Beispiel #1
0
 def setUp(self):
     for form in get_docs([DOMAIN, "http://www.commcarehq.org/export/test"]):
         XFormInstance.wrap(form).delete()
     dom = create_domain(DOMAIN)
     self.couch_user = WebUser.create(None, "test", "foobar")
     self.couch_user.add_domain_membership(DOMAIN, is_admin=True)
     self.couch_user.save()
Beispiel #2
0
def _get_new_form_json(xml, xform_id):
    form_json = convert_xform_to_json(xml)
    with force_phone_timezones_should_be_processed():
        adjust_datetimes(form_json)
    # this is actually in-place because of how jsonobject works
    scrub_meta(XFormInstance.wrap({'form': form_json, '_id': xform_id}))
    return form_json
Beispiel #3
0
def deidentify_form(doctransform):
    assert(doctransform.doc["doc_type"] == "XFormInstance")
    form = XFormInstance.wrap(doctransform.doc)
    xml = doctransform.attachments.get("form.xml", "")
    if form.xmlns in FORM_CONFIG:
        rootxml = etree.XML(xml)
        for proppath, generatorfunc in FORM_CONFIG[form.xmlns].items():
            parts = proppath.split("/")
            node = form.form
            xmlnode = rootxml
            for i, p in enumerate(parts):
                if p in node:
                    xml_index = "{%(ns)s}%(val)s" % {"ns": form.xmlns, "val": p}
                    if i == len(parts) - 1:
                        # override prop on the last step
                        val = str(generatorfunc())
                        node[p] = val
                        xmlnode.find(xml_index).text = val
                    else:
                        # or drill in
                        node = node[p]
                        # NOTE: currently will not work with repeated nodes
                        xmlnode = xmlnode.find(xml_index)
                else:
                    # no index to the property, so assume we don't 
                    # need to touch it
                    break
        doctransform.doc = form._doc
        doctransform.attachments["form.xml"] = etree.tostring(rootxml)
        return doctransform
    else:
        # if we didn't know how to deidentify it, we don't want
        # to return anything, to prevent potentially identified
        # data from sneaking in
        return None
Beispiel #4
0
def _fix_replacement_form_problem_in_couch(doc):
    """Fix replacement form created by swap_duplicate_xforms

    The replacement form was incorrectly created with "problem" text,
    which causes it to be counted as an error form, and that messes up
    the diff counts at the end of this migration.

    NOTE the replacement form's _id does not match instanceID in its
    form.xml. That issue is not resolved here.

    See:
    - corehq/apps/cleanup/management/commands/swap_duplicate_xforms.py
    - couchforms/_design/views/all_submissions_by_domain/map.js
    """
    problem = doc["problem"]
    assert problem.startswith(PROBLEM_TEMPLATE_START), doc
    assert doc["doc_type"] == "XFormInstance", doc
    deprecated_id = problem[len(PROBLEM_TEMPLATE_START):].split(" on ", 1)[0]
    form = XFormInstance.wrap(doc)
    form.deprecated_form_id = deprecated_id
    form.history.append(
        XFormOperation(
            user="******",
            date=datetime.utcnow(),
            operation="Resolved bad duplicate form during couch-to-sql "
            "migration. Original problem: %s" % problem,
        ))
    form.problem = None
    old_form = XFormInstance.get(deprecated_id)
    if old_form.initial_processing_complete and not form.initial_processing_complete:
        form.initial_processing_complete = True
    form.save()
    return form.to_json()
Beispiel #5
0
    def swap_doc_types(self, log_file, bad_xform_id, duplicate_xform_id, domain, dry_run):
        bad_xform = XFormInstance.get(bad_xform_id)

        # confirm that the doc hasn't already been fixed:
        bad_xform_problem = None
        try:
            bad_xform_problem = bad_xform.problem or ""
        except AttributeError:
            pass
        if bad_xform_problem:
            if re.match(PROBLEM_TEMPLATE_START, bad_xform_problem):
                self.log_already_fixed(log_file, bad_xform_id, domain)
                return

        duplicate_xform = XFormInstance.get(duplicate_xform_id)
        now = datetime.now().isoformat()

        # Convert the XFormInstance to an XFormDuplicate
        bad_xform.doc_type = XFormDuplicate.__name__
        bad_xform.problem = BAD_FORM_PROBLEM_TEMPLATE.format(duplicate_xform_id, now)
        bad_xform = XFormDuplicate.wrap(bad_xform.to_json())

        # Convert the XFormDuplicate to an XFormInstance
        duplicate_xform.problem = FIXED_FORM_PROBLEM_TEMPLATE.format(
            id_=bad_xform_id, datetime_=now
        )
        duplicate_xform.doc_type = XFormInstance.__name__
        duplicate_xform = XFormInstance.wrap(duplicate_xform.to_json())

        self.log_swap(log_file, bad_xform_id, domain, duplicate_xform_id, dry_run)

        if not dry_run:
            duplicate_xform.save()
            bad_xform.save()
Beispiel #6
0
def _get_submissions_for_patient_by_date(patient, visit_dates, schema='http://dev.commcarehq.org/pact/dots_form'):
    """Argument: Patient django object, visit date
    Will return a view result of all submissions by patient where the key is the patient pact_id
    return value: [pact_id, year, month, day]=>submission"""

    keys = []
    date_key_map = {}
    #t2 = datetime.now()
    for visit_date in visit_dates:
        day_of_week = visit_date.isoweekday()-1
        yearstart = visit_date.year
        monthstart = visit_date.month
        datestart = visit_date.day
        #get the xform count for that day
        key = [patient.couchdoc.pact_id, yearstart, monthstart, datestart, schema]
        keys.append(key)
        key_str = ''.join([str(x) for x in key])
        date_key_map[key_str] = visit_date
    submit_reduction = XFormInstance.view('pactcarehq/all_submits_by_patient_date', keys=keys)
    #d2 = datetime.now()-t2
    #print "\tSingle Patient data query QUERY: %d.%d" % (d2.seconds, d2.microseconds/1000)
    #t3 = datetime.now()
    ret = {} #a return value of date ordered submissions by
    for row in submit_reduction:
        key = row['key']
        key_str = ''.join([str(x) for x in key])
        submits = row['value']

        date = date_key_map[key_str]
        ret[date] = [XFormInstance.wrap(x) for x in submits]
    #d3 = datetime.now()-t3

    #print "\tSingle Patient data query HASHING: %d.%d" % (d3.seconds, d3.microseconds/1000)

    return ret
Beispiel #7
0
    def rows(self):
        domain = self.report.filter_values['domain']
        startdate = self.report.filter_values['startdate']
        enddate = self.report.filter_values['enddate']
        key_base = 'submission xmlns user'
        # todo this will do one couch view hit per relevant user. could be optimized to sql or something if desired
        user_ids = self.report.get_user_ids()
        rows = []
        for user in user_ids:
            last_submission = XFormInstance.get_db().view(
                'reports_forms/all_forms',
                startkey=[key_base, domain, self.xmlns, user, enddate],
                endkey=[key_base, domain, self.xmlns, user, startdate],
                limit=1,
                reduce=False,
                include_docs=True,
                descending=True,
            ).one()
            if last_submission:
                wrapped = XFormInstance.wrap(last_submission['doc'])
                user_row = [wrapped.xpath(path) for path in self.column_slugs]
            else:
                user_row = [NO_VALUE] * len(self.column_slugs)
            rows.append((user, user_row))

        # format
        formatted_rows = list(self.report.format.format_output(rows))
        # transpose
        return [[_(col)] + [r[i] for r in formatted_rows]
                for i, col in enumerate(self.column_slugs)]
def _get_new_form_json(xml, xform_id):
    form_json = convert_xform_to_json(xml)
    with force_phone_timezones_should_be_processed():
        adjust_datetimes(form_json)
    # this is actually in-place because of how jsonobject works
    scrub_meta(XFormInstance.wrap({'form': form_json, '_id': xform_id}))
    return form_json
    def handle(self, *args, **options):
        if len(args) < 2:
            print "please specify a filepath and an archiving_user"
            return
        filepath = args[0]
        archiving_user = args[1]

        try:
            form_ids = open(filepath)
        except Exception as e:
            print "there was an issue opening the file: %s" % e
            return

        try:
            form_ids = [f[0] for f in csv.reader(form_ids)]
        except Exception as e:
            print "there was an issue reading the file %s" % e
            return

        for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
            try:
                xform = XFormInstance.wrap(xform_doc)
                xform.archive(user_id=archiving_user)
                print "Archived form %s in domain %s" % (
                    xform._id, xform.domain
                )
            except Exception as e:
                print "Issue archiving XFORM %s for domain %s: %s" % (
                    xform_doc['_id'], xform_doc['domain'], e
                )
Beispiel #10
0
    def handle(self, *args, **options):
        if len(args) < 2:
            print "please specify a filepath and an archiving_user"
            return
        filepath = args[0]
        archiving_user = args[1]

        try:
            form_ids = open(filepath)
        except Exception as e:
            print "there was an issue opening the file: %s" % e
            return

        try:
            form_ids = [f[0] for f in csv.reader(form_ids)]
        except Exception as e:
            print "there was an issue reading the file %s" % e
            return

        for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
            try:
                xform = XFormInstance.wrap(xform_doc)
                xform.archive(user=archiving_user)
                print "Archived form %s in domain %s" % (xform._id,
                                                         xform.domain)
            except Exception as e:
                print "Issue archiving XFORM %s for domain %s: %s" % (
                    xform_doc['_id'], xform_doc['domain'], e)
    def swap_doc_types(self, log_file, bad_xform_id, duplicate_xform_id, domain, dry_run):
        bad_xform = XFormInstance.get(bad_xform_id)

        # confirm that the doc hasn't already been fixed:
        bad_xform_problem = None
        try:
            bad_xform_problem = bad_xform.problem or ""
        except AttributeError:
            pass
        if bad_xform_problem:
            if re.match(PROBLEM_TEMPLATE_START, bad_xform_problem):
                self.log_already_fixed(log_file, bad_xform_id, domain)
                return

        duplicate_xform = XFormInstance.get(duplicate_xform_id)
        now = datetime.now().isoformat()

        # Convert the XFormInstance to an XFormDuplicate
        bad_xform.doc_type = XFormDuplicate.__name__
        bad_xform.problem = BAD_FORM_PROBLEM_TEMPLATE.format(duplicate_xform_id, now)
        bad_xform = XFormDuplicate.wrap(bad_xform.to_json())

        # Convert the XFormDuplicate to an XFormInstance
        duplicate_xform.problem = FIXED_FORM_PROBLEM_TEMPLATE.format(
            id_=bad_xform_id, datetime_=now
        )
        duplicate_xform.doc_type = XFormInstance.__name__
        duplicate_xform = XFormInstance.wrap(duplicate_xform.to_json())

        self.log_swap(log_file, bad_xform_id, domain, duplicate_xform_id, dry_run)

        if not dry_run:
            duplicate_xform.save()
            bad_xform.save()
Beispiel #12
0
def archive_forms(domain, user, uploaded_data):
    response = {"errors": [], "success": []}

    form_ids = [row.get("form_id") for row in uploaded_data]
    missing_forms = set(form_ids)

    for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
        xform = XFormInstance.wrap(xform_doc)
        missing_forms.discard(xform["_id"])

        if xform["domain"] != domain:
            response["errors"].append(
                _(u"XFORM {form_id} does not belong to domain {domain}").format(
                    form_id=xform["_id"], domain=xform["domain"]
                )
            )
            continue

        xform_string = _(u"XFORM {form_id} for domain {domain} by user '{username}'").format(
            form_id=xform["_id"], domain=xform["domain"], username=user.username
        )

        try:
            xform.archive(user=user.username)
            response["success"].append(_(u"Successfully archived {form}").format(form=xform_string))
        except Exception as e:
            response["errors"].append(_(u"Could not archive {form}: {error}").format(form=xform_string, error=e))

    for missing_form_id in missing_forms:
        response["errors"].append(_(u"Could not find XForm {form_id}").format(form_id=missing_form_id))

    return response
Beispiel #13
0
    def test_normal_form_with_problem_and_case_updates(self):
        bad_form = submit_form_locally(TEST_FORM, self.domain_name).xform
        assert bad_form._id == "test-form", bad_form

        form = XFormInstance.wrap(bad_form.to_json())
        form._id = "new-form"
        form._rev = None
        form.problem = FIXED_FORM_PROBLEM_TEMPLATE.format(
            id_="test-form", datetime_="a day long ago")
        assert len(form.external_blobs) == 1, form.external_blobs
        form.external_blobs.pop("form.xml")
        with bad_form.fetch_attachment("form.xml", stream=True) as xml:
            form.put_attachment(xml, "form.xml", content_type="text/xml")
        form.save()

        bad_form.doc_type = "XFormDuplicate"
        bad_form.problem = BAD_FORM_PROBLEM_TEMPLATE.format(
            "new-form", "a day long ago")
        bad_form.save()

        case = self._get_case("test-case")
        self.assertEqual(case.xform_ids, ["test-form"])

        self._do_migration_and_assert_flags(self.domain_name)

        case = self._get_case("test-case")
        self.assertEqual(case.xform_ids, ["new-form"])
        self._compare_diffs([])
        form = FormAccessors(self.domain_name).get_form('new-form')
        self.assertEqual(form.deprecated_form_id, "test-form")
        self.assertIsNone(form.problem)
Beispiel #14
0
    def rows(self):
        domain = self.report.filter_values['domain']
        startdate = self.report.filter_values['startdate']
        enddate = self.report.filter_values['enddate']
        key_base = 'submission xmlns user'
        # todo this will do one couch view hit per relevant user. could be optimized to sql or something if desired
        user_ids = self.report.get_user_ids()
        rows = []
        for user in user_ids:
            last_submission = XFormInstance.get_db().view('all_forms/view',
                startkey=[key_base, domain, self.xmlns, user, enddate],
                endkey=[key_base, domain, self.xmlns, user, startdate],
                limit=1,
                reduce=False,
                include_docs=True,
                descending=True,
            ).one()
            if last_submission:
                wrapped = XFormInstance.wrap(last_submission['doc'])
                user_row = [wrapped.get_data(path) for path in self.column_slugs]
            else:
                user_row = [NO_VALUE] * len(self.column_slugs)
            rows.append((user, user_row))

        # format
        formatted_rows = list(self.report.format.format_output(rows))
        # transpose
        return [[_(col)] + [r[i] for r in formatted_rows] for i, col in enumerate(self.column_slugs)]
def _save_form_and_case(test):
    form = XFormInstance.wrap(_get_doc_data('bug_form.json'))
    form.save()
    test.addCleanup(form.delete)

    case = CommCareCase.wrap(_get_doc_data('bug_case.json'))
    case.save()
    test.addCleanup(case.delete)
    return form, case
Beispiel #16
0
    def _process_main_forms(self):
        last_received_on = datetime.min
        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id",
                                            max_size=10000,
                                            run_timestamp=self.run_timestamp)
        pool = Pool(15)
        self._rebuild_queues(pool)

        # process main forms (including cases and ledgers)
        changes = self._get_resumable_iterator(['XFormInstance'], 'main_forms')

        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id", max_size=10000)

        for change in self._with_progress(['XFormInstance'], changes):
            log.debug('Processing doc: {}({})'.format('XFormInstance',
                                                      change.id))
            form = change.get_document()
            if form.get('problem'):
                if six.text_type(
                        form['problem']).startswith(PROBLEM_TEMPLATE_START):
                    form = _fix_replacement_form_problem_in_couch(form)
                else:
                    self.errors_with_normal_doc_type.append(change.id)
                    continue
            try:
                wrapped_form = XFormInstance.wrap(form)
                form_received = wrapped_form.received_on
                assert last_received_on <= form_received
                last_received_on = form_received
                self._try_to_process_form(wrapped_form, pool)
                self._try_to_process_queues(pool)
            except Exception:
                log.exception("Error migrating form %s", change.id)

        # finish up the queues once all changes have been iterated through
        update_interval = timedelta(seconds=10)
        next_check = datetime.now()
        while self.queues.has_next():
            wrapped_form = self.queues.get_next()
            if wrapped_form:
                pool.spawn(self._migrate_form_and_associated_models_async,
                           wrapped_form)
            else:
                gevent.sleep(0.01)  # swap greenlets

            remaining_items = self.queues.remaining_items + len(pool)
            now = datetime.now()
            if now > next_check:
                log.info('Waiting on {} docs'.format(remaining_items))
                next_check += update_interval

        while not pool.join(timeout=10):
            log.info('Waiting on {} docs'.format(len(pool)))

        self._log_main_forms_processed_count()
Beispiel #17
0
 def forms_by_case(self):
     assert self.cases is not None, "SharedDataProvider was not instantiated with cases"
     all_form_ids = chain(*(case.xform_ids for case in self.cases))
     forms_by_case = defaultdict(list)
     for form in iter_docs(XFormInstance.get_db(), all_form_ids):
         if form["xmlns"] in OPM_XMLNSs:
             case_id = form["form"]["case"]["@case_id"]
             forms_by_case[case_id].append(XFormInstance.wrap(form))
     return forms_by_case
Beispiel #18
0
def _save_form_and_case(test):
    form = XFormInstance.wrap(_get_doc_data('bug_form.json'))
    form.save()
    test.addCleanup(form.delete)

    case = CommCareCase.wrap(_get_doc_data('bug_case.json'))
    case.save()
    test.addCleanup(case.delete)
    return form, case
Beispiel #19
0
 def test_error_with_normal_doc_type_migration(self):
     submit_form_locally(
         """<data xmlns="example.com/foo">
             <meta>
                 <instanceID>im-a-bad-form</instanceID>
             </meta>
         <case case_id="" xmlns="http://commcarehq.org/case/transaction/v2">
             <update><foo>bar</foo></update>
         </case>
         </data>""",
         self.domain_name,
     )
     form = FormAccessors(self.domain_name).get_form('im-a-bad-form')
     form_json = form.to_json()
     form_json['doc_type'] = 'XFormInstance'
     XFormInstance.wrap(form_json).save()
     self._do_migration_and_assert_flags(self.domain_name)
     self.assertEqual(1, len(self._get_form_ids('XFormError')))
     self._compare_diffs([])
Beispiel #20
0
 def forms_by_case(self):
     assert self.cases is not None, \
         "SharedDataProvider was not instantiated with cases"
     all_form_ids = chain(*(case.xform_ids for case in self.cases))
     forms_by_case = defaultdict(list)
     for form in iter_docs(XFormInstance.get_db(), all_form_ids):
         if form['xmlns'] in OPM_XMLNSs:
             case_id = form['form']['case']['@case_id']
             forms_by_case[case_id].append(XFormInstance.wrap(form))
     return forms_by_case
Beispiel #21
0
 def test_error_with_normal_doc_type_migration(self):
     submit_form_locally(
         """<data xmlns="example.com/foo">
             <meta>
                 <instanceID>im-a-bad-form</instanceID>
             </meta>
         <case case_id="" xmlns="http://commcarehq.org/case/transaction/v2">
             <update><foo>bar</foo></update>
         </case>
         </data>""",
         self.domain_name,
     )
     form = FormAccessors(self.domain_name).get_form('im-a-bad-form')
     form_json = form.to_json()
     form_json['doc_type'] = 'XFormInstance'
     XFormInstance.wrap(form_json).save()
     self._do_migration_and_assert_flags(self.domain_name)
     self.assertEqual(1, len(self._get_form_ids('XFormError')))
     self._compare_diffs([])
Beispiel #22
0
def _process_main_forms(domain):
    last_received_on = datetime.min
    # process main forms (including cases and ledgers)
    for change in _get_main_form_iterator(domain).iter_all_changes():
        form = change.get_document()
        wrapped_form = XFormInstance.wrap(form)
        form_received = iso_string_to_datetime(form['received_on'])
        assert last_received_on <= form_received
        last_received_on = form_received
        print 'processing form {}: {}'.format(form['_id'], form_received)
        _migrate_form_and_associated_models(domain, wrapped_form)
    def _process_main_forms(self):
        last_received_on = datetime.min
        # process main forms (including cases and ledgers)
        changes = _get_main_form_iterator(self.domain).iter_all_changes()
        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id", max_size=10000)

        pool = Pool(15)
        for change in self._with_progress(['XFormInstance'], changes):
            self.log_debug('Processing doc: {}({})'.format(
                'XFormInstance', change.id))
            form = change.get_document()
            if form.get('problem', None):
                self.errors_with_normal_doc_type.append(change.id)
                continue
            wrapped_form = XFormInstance.wrap(form)
            form_received = wrapped_form.received_on
            assert last_received_on <= form_received
            last_received_on = form_received

            case_ids = get_case_ids_from_form(wrapped_form)
            if case_ids:  # if this form involves a case check if we can process it
                if self.queues.try_obj(case_ids, wrapped_form):
                    pool.spawn(self._migrate_form_and_associated_models_async,
                               wrapped_form)
                elif self.queues.full:
                    sleep(0.01)  # swap greenlets
            else:  # if not, just go ahead and process it
                pool.spawn(self._migrate_form_and_associated_models_async,
                           wrapped_form)

            # regularly check if we can empty the queues
            while True:
                new_wrapped_form = self.queues.get_next()
                if not new_wrapped_form:
                    break
                pool.spawn(self._migrate_form_and_associated_models_async,
                           new_wrapped_form)

        # finish up the queues once all changes have been iterated through
        while self.queues.has_next():
            wrapped_form = self.queues.get_next()
            if wrapped_form:
                pool.spawn(self._migrate_form_and_associated_models_async,
                           wrapped_form)
            else:
                sleep(0.01)  # swap greenlets

            remaining_items = self.queues.remaining_items + len(pool)
            if remaining_items % 10 == 0:
                self.log_info('Waiting on {} docs'.format(remaining_items))

        while not pool.join(timeout=10):
            self.log_info('Waiting on {} docs'.format(len(pool)))
 def _compare_forms(self, actual_json, expected_json, msg):
     expected_json.update({
         'domain': self.domain,
         'received_on': actual_json['received_on'],
         '_rev': actual_json['_rev'],
         'initial_processing_complete': True,
         '#export_tag': actual_json['#export_tag'],
         'auth_context': actual_json['auth_context'],
     })
     expected_json = XFormInstance.wrap(expected_json).to_json()
     self.assertDictEqual(actual_json, expected_json, msg)
Beispiel #25
0
def _get_form_or_404(id):
    # maybe this should be a more general utility a-la-django's get_object_or_404
    try:
        xform_json = XFormInstance.get_db().get(id)
    except ResourceNotFound:
        raise Http404()

    if xform_json.get('doc_type') not in ('XFormInstance',):
        raise Http404()

    return XFormInstance.wrap(xform_json)
Beispiel #26
0
def archive_or_restore_forms(domain, user, form_ids, archive_or_restore, task=None, from_excel=False):
    response = {
        'errors': [],
        'success': [],
    }

    missing_forms = set(form_ids)
    success_count = 0

    if task:
        DownloadBase.set_progress(task, 0, len(form_ids))

    for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
        xform = XFormInstance.wrap(xform_doc)
        missing_forms.discard(xform['_id'])

        if xform['domain'] != domain:
            response['errors'].append(_(u"XFORM {form_id} does not belong to domain {domain}").format(
                form_id=xform['_id'], domain=xform['domain']))
            continue

        xform_string = _(u"XFORM {form_id} for domain {domain} by user '{username}'").format(
            form_id=xform['_id'],
            domain=xform['domain'],
            username=user.username)

        try:
            if archive_or_restore.is_archive_mode():
                xform.archive(user_id=user.username)
                message = _(u"Successfully archived {form}").format(form=xform_string)
            else:
                xform.unarchive(user_id=user.username)
                message = _(u"Successfully unarchived {form}").format(form=xform_string)
            response['success'].append(message)
            success_count = success_count + 1
        except Exception as e:
            response['errors'].append(_(u"Could not archive {form}: {error}").format(
                form=xform_string, error=e))

        if task:
            DownloadBase.set_progress(task, success_count, len(form_ids))

    for missing_form_id in missing_forms:
        response['errors'].append(
            _(u"Could not find XForm {form_id}").format(form_id=missing_form_id))

    if from_excel:
        return response

    response["success_count_msg"] = _("{success_msg} {count} form(s)".format(
        success_msg=archive_or_restore.success_text,
        count=success_count))
    return {"messages": response}
Beispiel #27
0
def _get_error_submissions_without_xmlns():

    query = (FormES().xmlns(
        'undefined').remove_default_filter("is_xform_instance").filter(
            NOT(doc_type('xforminstance'))).source(['_id']))
    result = query.run()
    total_error_submissions = result.total
    error_submissions = (
        XFormInstance.wrap(i)
        for i in iter_docs(XFormInstance.get_db(), (x['_id']
                                                    for x in result.hits)))
    return total_error_submissions, error_submissions
Beispiel #28
0
 def process_xform(self, doc):
     """Process XFormInstance document asynchronously"""
     form_id = doc["_id"]
     log.debug('Processing doc: XFormInstance(%s)', form_id)
     try:
         wrapped_form = XFormInstance.wrap(doc)
     except Exception:
         log.exception("Error migrating form %s", form_id)
         self.statedb.save_form_diffs(doc, {})
     else:
         self._try_to_process_form(wrapped_form)
         self._try_to_empty_queues()
Beispiel #29
0
 def _compare_forms(self, actual_json, expected_json, msg):
     expected_json.update({
         'domain': self.domain,
         'received_on': actual_json['received_on'],
         '_rev': actual_json['_rev'],
         'initial_processing_complete': True,
         '#export_tag': actual_json['#export_tag'],
         'auth_context': actual_json['auth_context'],
     })
     for name, meta in actual_json.get("external_blobs", {}).iteritems():
         expected_json["external_blobs"][name]["id"] = meta["id"]
     expected_json = XFormInstance.wrap(expected_json).to_json()
     self.assertDictEqual(actual_json, expected_json, msg)
 def _compare_forms(self, actual_json, expected_json, msg):
     expected_json.update(
         {
             "domain": self.domain,
             "received_on": actual_json["received_on"],
             "_rev": actual_json["_rev"],
             "initial_processing_complete": True,
             "#export_tag": actual_json["#export_tag"],
             "auth_context": actual_json["auth_context"],
         }
     )
     expected_json = XFormInstance.wrap(expected_json).to_json()
     self.assertDictEqual(actual_json, expected_json, msg)
Beispiel #31
0
 def _compare_forms(self, actual_json, expected_json, msg):
     expected_json.update({
         'domain': self.domain,
         'received_on': actual_json['received_on'],
         '_rev': actual_json['_rev'],
         'initial_processing_complete': True,
         '#export_tag': actual_json['#export_tag'],
         'auth_context': actual_json['auth_context'],
     })
     for name, meta in actual_json.get("external_blobs", {}).iteritems():
         expected_json["external_blobs"][name]["id"] = meta["id"]
     expected_json = XFormInstance.wrap(expected_json).to_json()
     self.assertDictEqual(actual_json, expected_json, msg)
def _get_error_submissions_without_xmlns():

    query = (FormES()
             .xmlns('undefined')
             .remove_default_filter("is_xform_instance")
             .filter(NOT(doc_type('xforminstance')))
             .source(['_id']))
    result = query.run()
    total_error_submissions = result.total
    error_submissions = (
        XFormInstance.wrap(i)
        for i in iter_docs(XFormInstance.get_db(), (x['_id'] for x in result.hits))
    )
    return total_error_submissions, error_submissions
Beispiel #33
0
    def _process_main_forms(self):
        last_received_on = datetime.min
        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id", max_size=10000, run_timestamp=self.run_timestamp)
        pool = Pool(15)
        self._rebuild_queues(pool)

        # process main forms (including cases and ledgers)
        changes = self._get_resumable_iterator(['XFormInstance'], 'main_forms')

        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id", max_size=10000)

        for change in self._with_progress(['XFormInstance'], changes):
            log.debug('Processing doc: {}({})'.format('XFormInstance', change.id))
            form = change.get_document()
            if form.get('problem', None):
                self.errors_with_normal_doc_type.append(change.id)
                continue
            try:
                wrapped_form = XFormInstance.wrap(form)
                form_received = wrapped_form.received_on
                assert last_received_on <= form_received
                last_received_on = form_received
                self._try_to_process_form(wrapped_form, pool)
                self._try_to_process_queues(pool)
            except Exception:
                log.exception("Error migrating form %s", change.id)

        # finish up the queues once all changes have been iterated through
        update_interval = timedelta(seconds=10)
        next_check = datetime.now()
        while self.queues.has_next():
            wrapped_form = self.queues.get_next()
            if wrapped_form:
                pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form)
            else:
                gevent.sleep(0.01)  # swap greenlets

            remaining_items = self.queues.remaining_items + len(pool)
            now = datetime.now()
            if now > next_check:
                log.info('Waiting on {} docs'.format(remaining_items))
                next_check += update_interval

        while not pool.join(timeout=10):
            log.info('Waiting on {} docs'.format(len(pool)))

        self._log_main_forms_processed_count()
Beispiel #34
0
def form_multimedia_export(request, domain, app_id):
    try:
        xmlns = request.GET["xmlns"]
        startdate = request.GET["startdate"]
        enddate = request.GET["enddate"]
        zip_name = request.GET.get("name", None)
    except KeyError:
        return HttpResponseBadRequest()

    def filename(form, question_id, extension):
        return "%s-%s-%s-%s.%s" % (form['form']['@name'],
                                   unidecode(question_id),
                                   form['form']['meta']['username'],
                                   form['_id'], extension)

    key = [domain, app_id, xmlns]
    stream_file = cStringIO.StringIO()
    zf = zipfile.ZipFile(stream_file, mode='w', compression=zipfile.ZIP_STORED)
    size = 22  # overhead for a zipfile
    unknown_number = 0
    form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments",
                                         start_key=key + [startdate],
                                         end_key=key + [enddate, {}],
                                         reduce=False)}
    for form in iter_docs(XFormInstance.get_db(), form_ids):
        f = XFormInstance.wrap(form)
        if not zip_name:
            zip_name = unidecode(form['form']['@name'])
        for key in form['_attachments'].keys():
            if form['_attachments'][key]['content_type'] == 'text/xml':
                continue
            extension = unicode(os.path.splitext(key)[1])
            try:
                question_id = unicode('-'.join(find_question_id(form['form'], key)))
            except TypeError:
                question_id = unicode('unknown' + str(unknown_number))
                unknown_number += 1
            fname = filename(form, question_id, extension)
            zi = zipfile.ZipInfo(fname, parse(form['received_on']).timetuple())
            zf.writestr(zi, f.fetch_attachment(key, stream=True).read())
            # includes overhead for file in zipfile
            size += f['_attachments'][key]['length'] + 88 + 2 * len(fname)

    zf.close()

    response = HttpResponse(stream_file.getvalue(), mimetype="application/zip")
    response['Content-Length'] = size
    response['Content-Disposition'] = 'attachment; filename=%s.zip' % zip_name
    return response
 def _process_main_forms(self):
     last_received_on = datetime.min
     # process main forms (including cases and ledgers)
     changes = _get_main_form_iterator(self.domain).iter_all_changes()
     for change in self._with_progress(['XFormInstance'], changes):
         self.log_debug('Processing doc: {}({})'.format('XFormInstance', change.id))
         form = change.get_document()
         if form.get('problem', None):
             self.errors_with_normal_doc_type.append(change.id)
             continue
         wrapped_form = XFormInstance.wrap(form)
         form_received = wrapped_form.received_on
         assert last_received_on <= form_received
         last_received_on = form_received
         self._migrate_form_and_associated_models(wrapped_form)
Beispiel #36
0
class BiharCase(CommCareCase):
    doc_type = 'CommCareCase'

    def dump_json(self):
        return {
            'case': self.to_json(),
            'forms': [f.to_json() for f in self.get_forms()]
        }

    @classmethod
    def from_dump(cls, json_dump):
        case = BiharCase.wrap(json_dump['case'])
        case._forms = [XFormInstance.wrap(f) for f in json_dump['forms']]
        case._forms_cache = dict((f._id, f) for f in case._forms)
        return case
Beispiel #37
0
    def handle(self, *args, **options):
        xform_db = XFormInstance.get_db()

        for domain in get_indicator_domains():
            namespaces = get_namespaces(domain)
            indicators = []
            for namespace in namespaces:
                indicators.extend(FormIndicatorDefinition.get_all(namespace, domain))

            form_ids = get_form_ids_by_type(domain, 'XFormInstance',
                                            start=datetime.date(2013, 8, 1),
                                            end=datetime.date(2013, 10, 15))

            for doc in iter_docs(xform_db, form_ids):
                xfrom_doc = XFormInstance.wrap(doc)
                xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
Beispiel #38
0
    def _process_main_forms(self):
        last_received_on = datetime.min
        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id",
                                            max_size=10000,
                                            run_timestamp=self.run_timestamp)
        pool = Pool(15)
        self._rebuild_queues(pool)

        # process main forms (including cases and ledgers)
        changes = _get_main_form_iterator(self.domain).iter_all_changes(
            resumable_key=self._get_resumable_iterator_key('main_forms'))

        # form_id needs to be on self to release appropriately
        self.queues = PartiallyLockingQueue("form_id", max_size=10000)

        for change in self._with_progress(['XFormInstance'], changes):
            self.log_debug('Processing doc: {}({})'.format(
                'XFormInstance', change.id))
            form = change.get_document()
            if form.get('problem', None):
                self.errors_with_normal_doc_type.append(change.id)
                continue
            wrapped_form = XFormInstance.wrap(form)
            form_received = wrapped_form.received_on
            assert last_received_on <= form_received
            last_received_on = form_received
            self._try_to_process_form(wrapped_form, pool)
            self._try_to_process_queues(pool)

        # finish up the queues once all changes have been iterated through
        while self.queues.has_next():
            wrapped_form = self.queues.get_next()
            if wrapped_form:
                pool.spawn(self._migrate_form_and_associated_models_async,
                           wrapped_form)
            else:
                gevent.sleep(0.01)  # swap greenlets

            remaining_items = self.queues.remaining_items + len(pool)
            if remaining_items % 10 == 0:
                self.log_info('Waiting on {} docs'.format(remaining_items))

        while not pool.join(timeout=10):
            self.log_info('Waiting on {} docs'.format(len(pool)))

        self._log_main_forms_processed_count()
Beispiel #39
0
def get_submissions_without_xmlns():
    submissions = XFormInstance.get_db().view(
        'couchforms/by_xmlns',
        key="undefined",
        include_docs=False,
        reduce=False,
    ).all()
    total_submissions = len(submissions)
    submission_id_generator = (s['id'] for s in submissions)
    submissions_doc_generator = (
        XFormInstance.wrap(i)
        for i in iter_docs(XFormInstance.get_db(), submission_id_generator))

    total_error_submissions, error_submissions_doc_generator = _get_error_submissions_without_xmlns(
    )
    return (total_submissions + total_error_submissions,
            chain(submissions_doc_generator, error_submissions_doc_generator))
Beispiel #40
0
 def process_xform(self, doc):
     """Process XFormInstance document asynchronously"""
     form_id = doc["_id"]
     log.debug('Processing doc: XFormInstance(%s)', form_id)
     if doc.get('problem'):
         if six.text_type(
                 doc['problem']).startswith(PROBLEM_TEMPLATE_START):
             doc = _fix_replacement_form_problem_in_couch(doc)
         else:
             self.statedb.add_problem_form(form_id)
             return
     try:
         wrapped_form = XFormInstance.wrap(doc)
     except Exception:
         log.exception("Error migrating form %s", form_id)
     self._try_to_process_form(wrapped_form)
     self._try_to_empty_queues()
Beispiel #41
0
def prepare_planning_db(domain):
    db_filepath = get_planning_db_filepath(domain)
    planning_db = PlanningDB.init(db_filepath)
    xform_ids = get_form_ids_by_type(domain, 'XFormInstance')
    xform_db = XFormInstance.get_db()

    for i, xform in enumerate(iter_docs(xform_db, xform_ids)):
        xform_id = xform['_id']
        case_actions_by_case_id = collections.defaultdict(list)
        try:
            xml = _get_submission_xml(xform, xform_db)
        except ResourceNotFound:
            continue
        new_form_json = _get_new_form_json(xml, xform_id)

        case_updates = get_case_updates(new_form_json)
        xform_copy = deepcopy(xform)
        xform_copy['form'] = new_form_json
        xformdoc = XFormInstance.wrap(xform_copy)
        xformdoc_json = xformdoc.to_json()

        planning_db.add_form(xform_id, xformdoc_json)
        planning_db.add_diffs('form', xform_id,
                              json_diff(xform, xformdoc_json))

        case_actions = [
            (case_update.id, action.xform_id, action.to_json())
            for case_update in case_updates
            for action in case_update.get_case_actions(xformdoc)
        ]

        stock_report_helpers, stock_case_actions = get_stock_actions(xformdoc)
        case_actions.extend(stock_case_actions)

        for case_id, xform_id, case_action in case_actions:
            case_actions_by_case_id[case_id].append((xform_id, case_action))

        for case_id, case_actions in case_actions_by_case_id.items():
            planning_db.ensure_case(case_id)
            planning_db.add_case_actions(case_id, case_actions)
        planning_db.add_stock_report_helpers([
            stock_report_helper.to_json()
            for stock_report_helper in stock_report_helpers
        ])
    return prepare_case_json(planning_db)
Beispiel #42
0
def prepare_planning_db(domain):
    db_filepath = get_planning_db_filepath(domain)
    planning_db = PlanningDB.init(db_filepath)
    xform_ids = get_form_ids_by_type(domain, 'XFormInstance')
    xform_db = XFormInstance.get_db()

    for i, xform in enumerate(iter_docs(xform_db, xform_ids)):
        xform_id = xform['_id']
        case_actions_by_case_id = collections.defaultdict(list)
        try:
            xml = _get_submission_xml(xform, xform_db)
        except ResourceNotFound:
            continue
        new_form_json = _get_new_form_json(xml, xform_id)

        case_updates = get_case_updates(new_form_json)
        xform_copy = deepcopy(xform)
        xform_copy['form'] = new_form_json
        xformdoc = XFormInstance.wrap(xform_copy)
        xformdoc_json = xformdoc.to_json()

        planning_db.add_form(xform_id, xformdoc_json)
        planning_db.add_diffs('form', xform_id,
                              json_diff(xform, xformdoc_json))

        case_actions = [
            (case_update.id, action.xform_id, action.to_json())
            for case_update in case_updates
            for action in case_update.get_case_actions(xformdoc)
        ]

        stock_report_helpers, stock_case_actions = get_stock_actions(xformdoc)
        case_actions.extend(stock_case_actions)

        for case_id, xform_id, case_action in case_actions:
            case_actions_by_case_id[case_id].append((xform_id, case_action))

        for case_id, case_actions in case_actions_by_case_id.items():
            planning_db.ensure_case(case_id)
            planning_db.add_case_actions(case_id, case_actions)
        planning_db.add_stock_report_helpers([
            stock_report_helper.to_json()
            for stock_report_helper in stock_report_helpers
        ])
    return prepare_case_json(planning_db)
def get_submissions_without_xmlns():
    submissions = XFormInstance.get_db().view(
        'couchforms/by_xmlns',
        key="undefined",
        include_docs=False,
        reduce=False,
    ).all()
    total_submissions = len(submissions)
    submission_id_generator = (s['id'] for s in submissions)
    submissions_doc_generator = (
        XFormInstance.wrap(i)
        for i in iter_docs(XFormInstance.get_db(), submission_id_generator)
    )

    total_error_submissions, error_submissions_doc_generator = _get_error_submissions_without_xmlns()
    return (
        total_submissions + total_error_submissions,
        chain(submissions_doc_generator, error_submissions_doc_generator)
    )
Beispiel #44
0
    def new_form_from_old(cls, existing_form, xml, value_responses_map, user_id):
        from corehq.form_processor.parsers.form import apply_deprecation
        new_form = XFormInstance.wrap(existing_form.to_json())

        for question, response in six.iteritems(value_responses_map):
            data = new_form.form_data
            i = XFormQuestionValueIterator(question)
            for (qid, repeat_index) in i:
                data = data[qid]
                if repeat_index is not None:
                    data = data[repeat_index]
            data[i.last()] = response

        new_xml = etree.tostring(xml)
        new_form._deferred_blobs = None     # will be re-populated by apply_deprecation
        new_form.external_blobs.clear()     # will be re-populated by apply_deprecation
        new_form.deferred_put_attachment(new_xml, "form.xml", content_type="text/xml")
        existing_form, new_form = apply_deprecation(existing_form, new_form)
        return (existing_form, new_form)
    def new_form_from_old(cls, existing_form, xml, value_responses_map,
                          user_id):
        from corehq.form_processor.parsers.form import apply_deprecation
        new_form = XFormInstance.wrap(existing_form.to_json())

        for question, response in six.iteritems(value_responses_map):
            data = new_form.form_data
            i = XFormQuestionValueIterator(question)
            for (qid, repeat_index) in i:
                data = data[qid]
                if repeat_index is not None:
                    data = data[repeat_index]
            data[i.last()] = response

        new_form._deferred_blobs = None  # will be re-populated by apply_deprecation
        new_form.external_blobs.clear(
        )  # will be re-populated by apply_deprecation
        existing_form, new_form = apply_deprecation(existing_form, new_form)
        return (existing_form, new_form)
Beispiel #46
0
 def _process_main_forms(self):
     last_received_on = datetime.min
     # process main forms (including cases and ledgers)
     changes = _get_main_form_iterator(self.domain).iter_all_changes()
     for change in self._with_progress(['XFormInstance'], changes):
         self.log_debug('Processing doc: {}({})'.format('XFormInstance', change.id))
         form = change.get_document()
         if form.get('problem', None):
             self.errors_with_normal_doc_type.append(change.id)
             continue
         wrapped_form = XFormInstance.wrap(form)
         form_received = wrapped_form.received_on
         assert last_received_on <= form_received
         last_received_on = form_received
         try:
             self._migrate_form_and_associated_models(wrapped_form)
         except:
             self.log_error("Unable to migrate form: {}".format(change.id))
             raise
Beispiel #47
0
    def handle(self, *args, **options):
        xform_db = XFormInstance.get_db()

        for domain in get_indicator_domains():
            namespaces = get_namespaces(domain)
            indicators = []
            for namespace in namespaces:
                indicators.extend(
                    FormIndicatorDefinition.get_all(namespace, domain))

            key = [domain, "by_type", "XFormInstance"]
            data = xform_db.view('couchforms/all_submissions_by_domain',
                                 startkey=key + ["2013-08-01"],
                                 endkey=key + ["2013-10-15"],
                                 reduce=False,
                                 include_docs=False).all()
            form_ids = [d['id'] for d in data]

            for doc in iter_docs(xform_db, form_ids):
                xfrom_doc = XFormInstance.wrap(doc)
                xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
Beispiel #48
0
def archive_forms(domain, user, uploaded_data):
    response = {
        'errors': [],
        'success': [],
    }

    form_ids = [row.get('form_id') for row in uploaded_data]
    missing_forms = set(form_ids)

    for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
        xform = XFormInstance.wrap(xform_doc)
        missing_forms.discard(xform['_id'])

        if xform['domain'] != domain:
            response['errors'].append(
                _(u"XFORM {form_id} does not belong to domain {domain}").
                format(form_id=xform['_id'], domain=xform['domain']))
            continue

        xform_string = _(
            u"XFORM {form_id} for domain {domain} by user '{username}'"
        ).format(form_id=xform['_id'],
                 domain=xform['domain'],
                 username=user.username)

        try:
            xform.archive(user=user.username)
            response['success'].append(
                _(u"Successfully archived {form}").format(form=xform_string))
        except Exception as e:
            response['errors'].append(
                _(u"Could not archive {form}: {error}").format(
                    form=xform_string, error=e))

    for missing_form_id in missing_forms:
        response['errors'].append(
            _(u"Could not find XForm {form_id}").format(
                form_id=missing_form_id))

    return response
    def handle(self, *args, **options):
        xform_db = XFormInstance.get_db()

        for domain in get_indicator_domains():
            namespaces = get_namespaces(domain)
            indicators = []
            for namespace in namespaces:
                indicators.extend(FormIndicatorDefinition.get_all(namespace, domain))

            key = [domain, "by_type", "XFormInstance"]
            data = xform_db.view(
                'couchforms/all_submissions_by_domain',
                startkey=key+["2013-08-01"],
                endkey=key+["2013-10-15"],
                reduce=False,
                include_docs=False
            ).all()
            form_ids = [d['id'] for d in data]

            for doc in iter_docs(xform_db, form_ids):
                xfrom_doc = XFormInstance.wrap(doc)
                xfrom_doc.update_indicators_in_bulk(indicators, logger=logging)
    def _process_skipped_forms(self):
        """process forms skipped by a previous migration

        note: does not diff cases
        """
        migrated = 0
        with self.counter('skipped_forms', 'XFormInstance.id') as add_form:
            for doc in self._iter_skipped_forms():
                try:
                    form = XFormInstance.wrap(doc)
                except Exception:
                    log.exception("Error wrapping form %s", doc)
                else:
                    self._migrate_form_and_associated_models(form)
                    add_form()
                    migrated += 1
                    if migrated % 100 == 0:
                        log.info("migrated %s previously skipped forms",
                                 migrated)
            if not self.stopper.clean_break:
                self.counter.pop("XFormInstance.id")
        log.info("finished migrating %s previously skipped forms", migrated)
Beispiel #51
0
    def handle(self, filepath, archiving_user, **options):
        try:
            form_ids = open(filepath)
        except Exception as e:
            print("there was an issue opening the file: %s" % e)
            return

        try:
            form_ids = [f[0] for f in csv.reader(form_ids)]
        except Exception as e:
            print("there was an issue reading the file %s" % e)
            return

        for xform_doc in iter_docs(XFormInstance.get_db(), form_ids):
            try:
                xform = XFormInstance.wrap(xform_doc)
                xform.archive(user_id=archiving_user)
                print("Archived form %s in domain %s" %
                      (xform._id, xform.domain))
            except Exception as e:
                print("Issue archiving XFORM %s for domain %s: %s" %
                      (xform_doc['_id'], xform_doc['domain'], e))
Beispiel #52
0
 def _process_missing_forms(self):
     """process forms missed by a previous migration"""
     migrated = 0
     with self.counter('missing_forms', 'XFormInstance.id') as add_form:
         for doc_type, doc in _iter_missing_forms(self.statedb,
                                                  self.stopper):
             try:
                 form = XFormInstance.wrap(doc)
             except Exception:
                 log.exception("Error wrapping form %s", doc)
             else:
                 proc = doc_type not in UNPROCESSED_DOC_TYPES
                 self._migrate_form(form,
                                    get_case_ids(form),
                                    form_is_processed=proc)
                 self.statedb.doc_not_missing(doc_type, form.form_id)
                 add_form()
                 migrated += 1
                 if migrated % 100 == 0:
                     log.info("migrated %s previously missed forms",
                              migrated)
     log.info("finished migrating %s previously missed forms", migrated)
Beispiel #53
0
def deidentify_form(doctransform):
    assert (doctransform.doc["doc_type"] == "XFormInstance")
    form = XFormInstance.wrap(doctransform.doc)
    xml = doctransform.attachments.get("form.xml", "")
    if form.xmlns in FORM_CONFIG:
        rootxml = etree.XML(xml)
        for proppath, generatorfunc in FORM_CONFIG[form.xmlns].items():
            parts = proppath.split("/")
            node = form.form
            xmlnode = rootxml
            for i, p in enumerate(parts):
                if p in node:
                    xml_index = "{%(ns)s}%(val)s" % {
                        "ns": form.xmlns,
                        "val": p
                    }
                    if i == len(parts) - 1:
                        # override prop on the last step
                        val = str(generatorfunc())
                        node[p] = val
                        xmlnode.find(xml_index).text = val
                    else:
                        # or drill in
                        node = node[p]
                        # NOTE: currently will not work with repeated nodes
                        xmlnode = xmlnode.find(xml_index)
                else:
                    # no index to the property, so assume we don't
                    # need to touch it
                    break
        doctransform.doc = form._doc
        doctransform.attachments["form.xml"] = etree.tostring(rootxml)
        return doctransform
    else:
        # if we didn't know how to deidentify it, we don't want
        # to return anything, to prevent potentially identified
        # data from sneaking in
        return None
Beispiel #54
0
    def _process_skipped_forms(self, cached=False):
        """process forms skipped by a previous migration

        note: does not diff cases
        """
        migrated = 0
        with self.counter('skipped_forms', 'XFormInstance.id') as add_form:
            skipped = _iter_skipped_forms(self.statedb, self.stopper, cached)
            for doc_type, doc in skipped:
                try:
                    form = XFormInstance.wrap(doc)
                except Exception:
                    log.exception("Error wrapping form %s", doc)
                else:
                    self._migrate_form_and_associated_models(form)
                    if cached:
                        self.statedb.doc_not_missing(doc_type, form.form_id)
                    add_form()
                    migrated += 1
                    if migrated % 100 == 0:
                        log.info("migrated %s previously skipped forms",
                                 migrated)
        log.info("finished migrating %s previously skipped forms", migrated)
Beispiel #55
0
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id):
    def find_question_id(form, value):
        for k, v in form.iteritems():
            if isinstance(v, dict):
                ret = find_question_id(v, value)
                if ret:
                    return [k] + ret
            else:
                if v == value:
                    return [k]

        return None

    def filename(form_info, question_id, extension):
        fname = u"%s-%s-%s-%s%s"
        if form_info["cases"]:
            fname = u"-".join(form_info["cases"]) + u"-" + fname
        return fname % (form_info["name"], unidecode(question_id), form_info["user"], form_info["id"], extension)

    case_ids = set()

    def extract_form_info(form, properties=None, case_ids=case_ids):
        unknown_number = 0
        meta = form["form"].get("meta", dict())
        # get case ids
        case_blocks = extract_case_blocks(form)
        cases = {c["@case_id"] for c in case_blocks}
        case_ids |= cases

        form_info = {
            "form": form,
            "attachments": list(),
            "name": form["form"].get("@name", "unknown form"),
            "user": meta.get("username", "unknown_user"),
            "cases": cases,
            "id": form["_id"],
        }
        for k, v in form["_attachments"].iteritems():
            if v["content_type"] == "text/xml":
                continue
            try:
                question_id = unicode(u"-".join(find_question_id(form["form"], k)))
            except TypeError:
                question_id = unicode(u"unknown" + unicode(unknown_number))
                unknown_number += 1

            if not properties or question_id in properties:
                extension = unicode(os.path.splitext(k)[1])
                form_info["attachments"].append(
                    {
                        "size": v["length"],
                        "name": k,
                        "question_id": question_id,
                        "extension": extension,
                        "timestamp": parse(form["received_on"]).timetuple(),
                    }
                )

        return form_info

    key = [domain, app_id, xmlns]
    form_ids = {
        f["id"]
        for f in XFormInstance.get_db().view(
            "attachments/attachments", start_key=key + [startdate], end_key=key + [enddate, {}], reduce=False
        )
    }

    properties = set()
    if export_id:
        schema = FormExportSchema.get(export_id)
        for table in schema.tables:
            # - in question id is replaced by . in excel exports
            properties |= {c.display.replace(".", "-") for c in table.columns}

    if not app_id:
        zip_name = "Unrelated Form"
    forms_info = list()
    for form in iter_docs(XFormInstance.get_db(), form_ids):
        if not zip_name:
            zip_name = unidecode(form["form"].get("@name", "unknown form"))
        forms_info.append(extract_form_info(form, properties))

    num_forms = len(forms_info)
    DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms)

    # get case names
    case_id_to_name = {c: c for c in case_ids}
    for case in iter_docs(CommCareCase.get_db(), case_ids):
        if case["name"]:
            case_id_to_name[case["_id"]] = case["name"]

    use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled
    if use_transfer:
        params = "_".join(map(str, [xmlns, startdate, enddate, export_id, num_forms]))
        fname = "{}-{}".format(app_id, hashlib.md5(params).hexdigest())
        fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname)
    else:
        _, fpath = tempfile.mkstemp()

    if not (os.path.isfile(fpath) and use_transfer):  # Don't rebuild the file if it is already there
        with open(fpath, "wb") as zfile:
            with zipfile.ZipFile(zfile, "w") as z:
                for form_number, form_info in enumerate(forms_info):
                    f = XFormInstance.wrap(form_info["form"])
                    form_info["cases"] = {case_id_to_name[case_id] for case_id in form_info["cases"]}
                    for a in form_info["attachments"]:
                        fname = filename(form_info, a["question_id"], a["extension"])
                        zi = zipfile.ZipInfo(fname, a["timestamp"])
                        z.writestr(zi, f.fetch_attachment(a["name"], stream=True).read(), zipfile.ZIP_STORED)
                    DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms)

    common_kwargs = dict(
        mimetype="application/zip",
        content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name),
        download_id=download_id,
    )

    if use_transfer:
        expose_file_download(fpath, use_transfer=use_transfer, **common_kwargs)
    else:
        expose_cached_download(
            FileWrapper(open(fpath)),
            expiry=(1 * 60 * 60),
            file_extension=file_extention_from_filename(fpath),
            **common_kwargs
        )

    DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
Beispiel #56
0
 def _clear_docs(self):
     config = ExportConfiguration(XFormInstance.get_db(),
                                  [DOMAIN, "http://www.commcarehq.org/export/test"])
     for form in config.get_docs():
         XFormInstance.wrap(form).delete()
 def form_wrapper(row):
     doc = row["doc"]
     doc.pop("_attachments", None)
     return XFormInstance.wrap(doc)
Beispiel #58
0
def build_form_multimedia_zip(domain, xmlns, startdate, enddate, app_id, export_id, zip_name, download_id):

    def find_question_id(form, value):
        for k, v in form.iteritems():
            if isinstance(v, dict):
                ret = find_question_id(v, value)
                if ret:
                    return [k] + ret
            else:
                if v == value:
                    return [k]

        return None

    def filename(form_info, question_id, extension):
        fname = u"%s-%s-%s-%s%s"
        if form_info['cases']:
            fname = u'-'.join(form_info['cases']) + u'-' + fname
        return fname % (form_info['name'],
                        unidecode(question_id),
                        form_info['user'],
                        form_info['id'], extension)

    case_ids = set()

    def extract_form_info(form, properties=None, case_ids=case_ids):
        unknown_number = 0
        meta = form['form'].get('meta', dict())
        # get case ids
        case_blocks = extract_case_blocks(form)
        cases = {c['@case_id'] for c in case_blocks}
        case_ids |= cases

        form_info = {
            'form': form,
            'attachments': list(),
            'name': form['form'].get('@name', 'unknown form'),
            'user': meta.get('username', 'unknown_user'),
            'cases': cases,
            'id': form['_id']
        }
        for k, v in form['_attachments'].iteritems():
            if v['content_type'] == 'text/xml':
                continue
            try:
                question_id = unicode(u'-'.join(find_question_id(form['form'], k)))
            except TypeError:
                question_id = unicode(u'unknown' + unicode(unknown_number))
                unknown_number += 1

            if not properties or question_id in properties:
                extension = unicode(os.path.splitext(k)[1])
                form_info['attachments'].append({
                    'size': v['length'],
                    'name': k,
                    'question_id': question_id,
                    'extension': extension,
                    'timestamp': parse(form['received_on']).timetuple(),
                })

        return form_info

    key = [domain, app_id, xmlns]
    form_ids = {f['id'] for f in XFormInstance.get_db().view("attachments/attachments",
                                                             start_key=key + [startdate],
                                                             end_key=key + [enddate, {}],
                                                             reduce=False)}

    properties = set()
    if export_id:
        schema = FormExportSchema.get(export_id)
        for table in schema.tables:
            # - in question id is replaced by . in excel exports
            properties |= {c.display.replace('.', '-') for c in table.columns}

    if not app_id:
        zip_name = 'Unrelated Form'
    forms_info = list()
    for form in iter_docs(XFormInstance.get_db(), form_ids):
        if not zip_name:
            zip_name = unidecode(form['form'].get('@name', 'unknown form'))
        forms_info.append(extract_form_info(form, properties))

    num_forms = len(forms_info)
    DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms)

    # get case names
    case_id_to_name = {c: c for c in case_ids}
    for case in iter_docs(CommCareCase.get_db(), case_ids):
        if case['name']:
            case_id_to_name[case['_id']] = case['name']

    use_transfer = settings.SHARED_DRIVE_CONF.transfer_enabled
    if use_transfer:
        params = '_'.join(map(str, [xmlns, startdate, enddate, export_id, num_forms]))
        fname = '{}-{}'.format(app_id, hashlib.md5(params).hexdigest())
        fpath = os.path.join(settings.SHARED_DRIVE_CONF.transfer_dir, fname)
    else:
        _, fpath = tempfile.mkstemp()

    if not (os.path.isfile(fpath) and use_transfer):  # Don't rebuild the file if it is already there
        with open(fpath, 'wb') as zfile:
            with zipfile.ZipFile(zfile, 'w') as z:
                for form_number, form_info in enumerate(forms_info):
                    f = XFormInstance.wrap(form_info['form'])
                    form_info['cases'] = {case_id_to_name[case_id] for case_id in form_info['cases']}
                    for a in form_info['attachments']:
                        fname = filename(form_info, a['question_id'], a['extension'])
                        zi = zipfile.ZipInfo(fname, a['timestamp'])
                        z.writestr(zi, f.fetch_attachment(a['name'], stream=True).read(), zipfile.ZIP_STORED)
                    DownloadBase.set_progress(build_form_multimedia_zip, form_number + 1, num_forms)

    common_kwargs = dict(
        mimetype='application/zip',
        content_disposition='attachment; filename="{fname}.zip"'.format(fname=zip_name),
        download_id=download_id,
    )

    if use_transfer:
        expose_file_download(
            fpath,
            use_transfer=use_transfer,
            **common_kwargs
        )
    else:
        expose_cached_download(
            FileWrapper(open(fpath)),
            expiry=(1 * 60 * 60),
            file_extension=file_extention_from_filename(fpath),
            **common_kwargs
        )

    DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
Beispiel #59
0
 def form_wrapper(row):
     doc = row['doc']
     doc.pop('_attachments', None)
     doc.pop('external_blobs', None)
     return XFormInstance.wrap(doc)