def _try_to_process_form(self, wrapped_form, pool): case_ids = get_case_ids_from_form(wrapped_form) if self.queues.try_obj(case_ids, wrapped_form): pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) elif self.queues.full: gevent.sleep(0.01) # swap greenlets
def tag_forms_as_deleted_rebuild_associated_cases(user_id, domain, form_id_list, deletion_id, deletion_date, deleted_cases=None): """ Upon user deletion, mark associated forms as deleted and prep cases for a rebuild. - 2 saves/sec for cloudant slowness (rate_limit) """ if deleted_cases is None: deleted_cases = set() cases_to_rebuild = set() forms_to_check = get_docs(XFormInstance.get_db(), form_id_list) forms_to_save = [] for form in forms_to_check: assert form['domain'] == domain if not is_deleted(form): form['doc_type'] += DELETED_SUFFIX form['-deletion_id'] = deletion_id form['-deletion_date'] = deletion_date forms_to_save.append(form) # rebuild all cases anyways since we don't know if this has run or not if the task was killed cases_to_rebuild.update(get_case_ids_from_form(form)) XFormInstance.get_db().bulk_save(forms_to_save) detail = UserArchivedRebuild(user_id=user_id) for case in cases_to_rebuild - deleted_cases: _rebuild_case_with_retries.delay(domain, case, detail)
def _archive_unarchive_form(form, user_id, archive): from casexml.apps.case.xform import get_case_ids_from_form form_id = form.form_id case_ids = list(get_case_ids_from_form(form)) with get_cursor(XFormInstanceSQL) as cursor: cursor.execute('SELECT archive_unarchive_form(%s, %s, %s)', [form_id, user_id, archive]) cursor.execute('SELECT revoke_restore_case_transactions_for_form(%s, %s, %s)', [case_ids, form_id, archive])
def get_payload(self, repeat_record, form): case_ids = list(get_case_ids_from_form(form)) return json.dumps({ 'form_id': form.form_id, 'received_on': json_format_datetime(form.received_on), 'case_ids': case_ids })
def is_case_updated(self, submission, method="couch"): # use the same case processing utilities the case code does def _case_ids_in_couch(submission): case_view = CommCareCase.get_db().view('case/by_xform_id', key=submission['_id'], reduce=False).all() return [row['id'] for row in case_view] def _case_ids_in_es(submission): query = { "filter": { "and": [ {"term": {"xform_ids": submission['_id']}} ] }, "from": 0, "size":1 } es_results = self.es['hqcases'].post('_search', data=query) return [row['_source']['_id'] for row in es_results['hits']['hits']] \ if es_results['hits']['hits'] else [] case_ids_in_form = get_case_ids_from_form(submission) case_ids_in_db = set({ "couch": _case_ids_in_couch, "es": _case_ids_in_es, }[method](submission)) missing = case_ids_in_form - case_ids_in_db return list(case_ids_in_form), list(missing), bool(missing)
def is_case_updated(self, submission, method="couch"): # use the same case processing utilities the case code does def _case_ids_in_couch(submission): case_view = CommCareCase.get_db().view('case/by_xform_id', key=submission['_id'], reduce=False).all() return [row['id'] for row in case_view] def _case_ids_in_es(submission): query = { "filter": { "and": [{ "term": { "xform_ids": submission['_id'] } }] }, "from": 0, "size": 1 } es_results = self.es['hqcases'].post('_search', data=query) return [row['_source']['_id'] for row in es_results['hits']['hits']] \ if es_results['hits']['hits'] else [] case_ids_in_form = get_case_ids_from_form(submission) case_ids_in_db = set({ "couch": _case_ids_in_couch, "es": _case_ids_in_es, }[method](submission)) missing = case_ids_in_form - case_ids_in_db return list(case_ids_in_form), list(missing), bool(missing)
def tag_forms_as_deleted_rebuild_associated_cases(user_id, domain, form_id_list, deletion_id, deletion_date, deleted_cases=None): """ Upon user deletion, mark associated forms as deleted and prep cases for a rebuild. - 2 saves/sec for cloudant slowness (rate_limit) """ deleted_cases = deleted_cases or set() cases_to_rebuild = set() for form in FormAccessors(domain).iter_forms(form_id_list): if form.domain != domain or not form.is_normal: continue # rebuild all cases anyways since we don't know if this has run or not if the task was killed cases_to_rebuild.update(get_case_ids_from_form(form)) # do this after getting case_id's since iter_forms won't return deleted forms FormAccessors(domain).soft_delete_forms(list(form_id_list), deletion_date, deletion_id) detail = UserArchivedRebuild(user_id=user_id) for case_id in cases_to_rebuild - deleted_cases: _rebuild_case_with_retries.delay(domain, case_id, detail)
def set_archived_state(self, form, archive, user_id): from casexml.apps.case.xform import get_case_ids_from_form form_id = form.form_id case_ids = list(get_case_ids_from_form(form)) with self.model.get_plproxy_cursor() as cursor: cursor.execute('SELECT archive_unarchive_form(%s, %s, %s)', [form_id, user_id, archive]) cursor.execute('SELECT revoke_restore_case_transactions_for_form(%s, %s, %s)', [case_ids, form_id, archive]) form.state = self.model.ARCHIVED if archive else self.model.NORMAL
def rebuild_form_cases(sender, xform, *args, **kwargs): from casexml.apps.case.xform import get_case_ids_from_form from casexml.apps.case.cleanup import rebuild_case_from_forms domain = xform.domain case_ids = get_case_ids_from_form(xform) detail = FormArchiveRebuild(xmlns=xform.xmlns, form_id=xform.form_id, archived=xform.is_archived) for case_id in case_ids: rebuild_case_from_forms(domain, case_id, detail)
def test_payload(self): form = self.post_xml(self.xform_xml, self.domain).xform repeat_records = self.repeat_records(self.domain).all() payload = repeat_records[0].get_payload() self.assertEqual(json.loads(payload), { 'received_on': json_format_datetime(form.received_on), 'form_id': form.form_id, 'case_ids': list(get_case_ids_from_form(form)) })
def test_basic(self): case_id = uuid4().hex xform = FakeForm({ 'data': { 'some': 'stuff' }, 'case': case_block(case_id), }) self.assertEqual(get_case_ids_from_form(xform), {case_id})
def references_case(form_id): try: form = FormAccessorCouch.get_form(form_id) except XFormNotFound: return True # assume case is referenced if form not found try: return case_id in get_case_ids_from_form(form) except MissingFormXml: return True # assume case is referenced if form XML is missing
def _get_case_ids(form_ids): for form_id_chunk in chunked(form_ids, 100): form_id_chunk = list(form_id_chunk) try: forms = XFormInstance.objects.get_forms_with_attachments_meta( form_id_chunk) except Exception: logger.exception("Error fetching bulk forms") for form_id in form_id_chunk: try: form = XFormInstance.objects.get_form(form_id) except Exception as e: yield form_id, [f"Unable to get form: {e}"] else: yield form.form_id, get_case_ids_from_form(form) else: for form in forms: yield form.form_id, get_case_ids_from_form(form)
def rebuild_form_cases(sender, xform, *args, **kwargs): from casexml.apps.case.xform import get_case_ids_from_form from casexml.apps.case.cleanup import rebuild_case_from_forms from corehq.form_processor.parsers.ledgers.form import get_case_ids_from_stock_transactions domain = xform.domain case_ids = get_case_ids_from_form(xform) | get_case_ids_from_stock_transactions(xform) detail = FormArchiveRebuild(form_id=xform.form_id, archived=xform.is_archived) for case_id in case_ids: rebuild_case_from_forms(domain, case_id, detail)
def _is_safe_to_modify(form): if form.domain != domain: return False case_ids = get_case_ids_from_form(form) # all cases touched by the form and not already modified for case in CaseAccessors(domain).iter_cases(case_ids - modified_cases): if case.is_deleted != is_deletion: # we can't delete/undelete this form - this would change the state of `case` return False
def rebuild_form_cases(sender, xform, *args, **kwargs): from casexml.apps.case.xform import get_case_ids_from_form from casexml.apps.case.cleanup import rebuild_case_from_forms case_ids = get_case_ids_from_form(xform) transactions = StockTransaction.objects.filter(report__form_id=xform.form_id) stock_case_ids = transactions.values_list('case_id', flat=True).distinct() case_ids.update(stock_case_ids) for case_id in case_ids: rebuild_case_from_forms(case_id)
def get_case_ids(form): """Get a set of case ids referenced in form Gracefully handles missing XML, but will omit case ids referenced in ledger updates if XML is missing. """ try: return get_case_ids_from_form(form) except MissingFormXml: return {update.id for update in get_case_updates(form)}
def _process_main_forms(self): last_received_on = datetime.min # process main forms (including cases and ledgers) changes = _get_main_form_iterator(self.domain).iter_all_changes() # form_id needs to be on self to release appropriately self.queues = PartiallyLockingQueue("form_id", max_size=10000) pool = Pool(15) for change in self._with_progress(['XFormInstance'], changes): self.log_debug('Processing doc: {}({})'.format( 'XFormInstance', change.id)) form = change.get_document() if form.get('problem', None): self.errors_with_normal_doc_type.append(change.id) continue wrapped_form = XFormInstance.wrap(form) form_received = wrapped_form.received_on assert last_received_on <= form_received last_received_on = form_received case_ids = get_case_ids_from_form(wrapped_form) if case_ids: # if this form involves a case check if we can process it if self.queues.try_obj(case_ids, wrapped_form): pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) elif self.queues.full: sleep(0.01) # swap greenlets else: # if not, just go ahead and process it pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) # regularly check if we can empty the queues while True: new_wrapped_form = self.queues.get_next() if not new_wrapped_form: break pool.spawn(self._migrate_form_and_associated_models_async, new_wrapped_form) # finish up the queues once all changes have been iterated through while self.queues.has_next(): wrapped_form = self.queues.get_next() if wrapped_form: pool.spawn(self._migrate_form_and_associated_models_async, wrapped_form) else: sleep(0.01) # swap greenlets remaining_items = self.queues.remaining_items + len(pool) if remaining_items % 10 == 0: self.log_info('Waiting on {} docs'.format(remaining_items)) while not pool.join(timeout=10): self.log_info('Waiting on {} docs'.format(len(pool)))
def _is_safe_to_delete(form): if form.domain != domain: return False case_ids = get_case_ids_from_form(form) cases_touched_by_form_not_deleted = case_ids - deleted_cases for case in CaseAccessors(domain).iter_cases(cases_touched_by_form_not_deleted): if not case.is_deleted: return False # all cases touched by this form are deleted return True
def test_blocks_in_list(self): case_ids = {uuid4().hex for x in range(3)} xform = FakeForm({ 'data': { 'parent': { 'parent': { 'case': [case_block(c) for c in case_ids] } } } }) self.assertEqual(get_case_ids_from_form(xform), case_ids)
def rebuild_form_cases(sender, xform, *args, **kwargs): from casexml.apps.case.xform import get_case_ids_from_form from casexml.apps.case.cleanup import rebuild_case_from_forms domain = xform.domain case_ids = get_case_ids_from_form(xform) transactions = StockTransaction.objects.filter(report__form_id=xform.form_id) stock_case_ids = transactions.values_list('case_id', flat=True).distinct() case_ids.update(stock_case_ids) detail = FormArchiveRebuild(form_id=xform.form_id, archived=xform.is_archived) for case_id in case_ids: rebuild_case_from_forms(domain, case_id, detail)
def _is_safe_to_modify(form): if form.domain != domain: return False case_ids = get_case_ids_from_form(form) # all cases touched by the form and not already modified for case in CaseAccessors(domain).iter_cases(case_ids - modified_cases): if case.is_deleted != is_deletion: # we can't delete/undelete this form - this would change the state of `case` return False # all cases touched by this form are deleted return True
def tag_forms_as_deleted_rebuild_associated_cases(formlist, deletion_id, deleted_cases=None): from casexml.apps.case.cleanup import rebuild_case if deleted_cases is None: deleted_cases = set() cases_to_rebuild = set() for form in formlist: form['doc_type'] += DELETED_SUFFIX form['-deletion_id'] = deletion_id cases_to_rebuild.update(get_case_ids_from_form(form)) XFormInstance.get_db().bulk_save(formlist) for case in cases_to_rebuild - deleted_cases: rebuild_case(case)
def change_transform(self, doc_dict, include_props=True): if self.get_domain(doc_dict) is None: #If the domain is still None (especially when doing updates via the _changes feed) #skip and do nothing #the reason being is that changes on the xform instance do not necessarily add #domain to it, so we need to wait until the domain is at least populated before #going through with indexing this xform return None else: doc_ret = copy.deepcopy(doc_dict) if 'meta' in doc_ret['form']: if not is_valid_date(doc_ret['form']['meta'].get( 'timeEnd', None)): doc_ret['form']['meta']['timeEnd'] = None if not is_valid_date(doc_ret['form']['meta'].get( 'timeStart', None)): doc_ret['form']['meta']['timeStart'] = None # Some docs have their @xmlns and #text here if isinstance(doc_ret['form']['meta'].get('appVersion'), dict): doc_ret['form']['meta']['appVersion'] = doc_ret['form'][ 'meta']['appVersion'].get('#text') case_blocks = extract_case_blocks(doc_ret) for case_dict in case_blocks: for date_modified_key in ['date_modified', '@date_modified']: if not is_valid_date(case_dict.get(date_modified_key, None)): if case_dict.get(date_modified_key) == '': case_dict[date_modified_key] = None else: case_dict.pop(date_modified_key, None) # convert all mapped dict properties to nulls if they are empty strings for object_key in ['index', 'attachment', 'create', 'update']: if object_key in case_dict and not isinstance( case_dict[object_key], dict): case_dict[object_key] = None doc_ret["__retrieved_case_ids"] = list( get_case_ids_from_form(doc_dict)) if include_props: form_props = [ "%s:%s" % (k, v) for k, v in flatten(doc_ret['form']).iteritems() ] doc_ret["__props_for_querying"] = form_props return doc_ret
def rebuild_case_changes(form, rebuild_reason=None): """ Publishes changes for the form and rebuilds any touched cases. """ domain = form.domain case_ids = get_case_ids_from_form(form) for case_id in case_ids: detail = FormReprocessRebuild(form_id=form.form_id) FormProcessorInterface(domain).hard_rebuild_case(case_id, detail) if LedgerAccessors(domain).get_ledger_values_for_case(case_id): with open('case_ids_with_ledgers.csv', 'a+') as f: print("{}, {}".format(domain, case_id), file=f) return len(case_ids)
def iterate_forms_and_collect_case_ids(self): print("Iterating Through %s XForms and Collecting Case Ids" % len(self.xform_ids)) for xform in self.forms_accessor.iter_forms(self.xform_ids): # Get app version by fetching app corresponding to xform build_id since xform.form # does not have updated app version unless form was updated for that version app_version_built_with = self.get_xform_build_version(xform) if app_version_built_with and app_version_built_with < self.version_number: _print_form_details(xform, self.xform_writer, app_version_built_with) self.ensure_valid_xform(xform) self.filtered_xform_ids.append(xform.form_id) self.case_ids = self.case_ids.union(get_case_ids_from_form(xform)) else: print('skipping xform id: %s' % xform.form_id) if self.case_ids: self.print_case_details()
def test_blocks_in_repeat(self): case_ids = {uuid4().hex for x in range(3)} blocks = [case_block(c) for c in case_ids] xform = FakeForm({ 'data': { 'parent': { 'repeats': [{ 'group': { 'case': block } } for block in blocks] } } }) self.assertEqual(get_case_ids_from_form(xform), case_ids)
def iterate_forms_and_collect_case_ids(self): print("Iterating Through %s XForms and Collecting Case Ids" % len(self.xform_ids)) for xform in self.forms_accessor.iter_forms(self.xform_ids): # Get app version by fetching app corresponding to xform build_id since xform.form # does not have updated app version unless form was updated for that version app_version_built_with = self.get_xform_build_version(xform) if app_version_built_with and app_version_built_with < self.version_number: _print_form_details(xform, self.xform_writer, app_version_built_with) self.ensure_valid_xform(xform) self.filtered_xform_ids.append(xform.form_id) self.case_ids = self.case_ids.union( get_case_ids_from_form(xform)) else: print('skipping xform id: %s' % xform.form_id) if self.case_ids: self.print_case_details()
def transform_xform_for_elasticsearch(doc_dict, include_props=True): """ Given an XFormInstance, return a copy that is ready to be sent to elasticsearch, or None, if the form should not be saved to elasticsearch """ if doc_dict.get('domain', None) is None: # if there is no domain don't bother processing it return None else: doc_ret = copy.deepcopy(doc_dict) if 'meta' in doc_ret['form']: if not is_valid_date(doc_ret['form']['meta'].get('timeEnd', None)): doc_ret['form']['meta']['timeEnd'] = None if not is_valid_date(doc_ret['form']['meta'].get('timeStart', None)): doc_ret['form']['meta']['timeStart'] = None # Some docs have their @xmlns and #text here if isinstance(doc_ret['form']['meta'].get('appVersion'), dict): doc_ret['form']['meta']['appVersion'] = doc_ret['form']['meta']['appVersion'].get('#text') case_blocks = extract_case_blocks(doc_ret) for case_dict in case_blocks: for date_modified_key in ['date_modified', '@date_modified']: if not is_valid_date(case_dict.get(date_modified_key, None)): if case_dict.get(date_modified_key) == '': case_dict[date_modified_key] = None else: case_dict.pop(date_modified_key, None) # convert all mapped dict properties to nulls if they are empty strings for object_key in ['index', 'attachment', 'create', 'update']: if object_key in case_dict and not isinstance(case_dict[object_key], dict): case_dict[object_key] = None doc_ret["__retrieved_case_ids"] = list(get_case_ids_from_form(doc_dict)) if include_props: form_props = ["%s:%s" % (k, v) for k, v in flatten(doc_ret['form']).iteritems()] doc_ret["__props_for_querying"] = form_props return doc_ret
def change_transform(self, doc_dict, include_props=True): if self.get_domain(doc_dict) is None: #If the domain is still None (especially when doing updates via the _changes feed) #skip and do nothing #the reason being is that changes on the xform instance do not necessarily add #domain to it, so we need to wait until the domain is at least populated before #going through with indexing this xform return None else: doc_ret = copy.deepcopy(doc_dict) if 'meta' in doc_ret['form']: if not is_valid_date(doc_ret['form']['meta'].get('timeEnd', None)): doc_ret['form']['meta']['timeEnd'] = None if not is_valid_date(doc_ret['form']['meta'].get('timeStart', None)): doc_ret['form']['meta']['timeStart'] = None # Some docs have their @xmlns and #text here if isinstance(doc_ret['form']['meta'].get('appVersion'), dict): doc_ret['form']['meta']['appVersion'] = doc_ret['form']['meta']['appVersion'].get('#text') case_blocks = extract_case_blocks(doc_ret) for case_dict in case_blocks: for date_modified_key in ['date_modified', '@date_modified']: if not is_valid_date(case_dict.get(date_modified_key, None)): if case_dict.get(date_modified_key) == '': case_dict[date_modified_key] = None else: case_dict.pop(date_modified_key, None) # convert all mapped dict properties to nulls if they are empty strings for object_key in ['index', 'attachment', 'create', 'update']: if object_key in case_dict and not isinstance(case_dict[object_key], dict): case_dict[object_key] = None doc_ret["__retrieved_case_ids"] = list(get_case_ids_from_form(doc_dict)) if include_props: form_props = ["%s:%s" % (k, v) for k, v in flatten(doc_ret['form']).iteritems()] doc_ret["__props_for_querying"] = form_props return doc_ret
def undo_form_edits(forms): cases_to_rebuild = defaultdict(set) operation_date = datetime.utcnow() for form in forms: # undo corehq.form_processor.parsers.form.apply_deprecation if form.is_deprecated: form.form_id = new_id_in_same_dbalias(form.form_id) form.state = XFormInstanceSQL.NORMAL form.orig_id = None form.edited_on = None form.date = operation_date else: form.deprecated_form_id = None form.received_on = form.edited_on form.edited_on = None form.track_create( XFormOperationSQL(user_id='system', operation=XFormOperationSQL.UUID_DATA_FIX)) cases_to_rebuild[form.domain].update(get_case_ids_from_form(form)) form.save() return cases_to_rebuild
def tag_forms_as_deleted_rebuild_associated_cases(user_id, domain, form_id_list, deletion_id, deletion_date, deleted_cases=None): """ Upon user deletion, mark associated forms as deleted and prep cases for a rebuild. - 2 saves/sec for cloudant slowness (rate_limit) """ deleted_cases = deleted_cases or set() cases_to_rebuild = set() for form in FormAccessors(domain).iter_forms(form_id_list): if form.domain != domain: continue # rebuild all cases anyways since we don't know if this has run or not if the task was killed cases_to_rebuild.update(get_case_ids_from_form(form)) # do this after getting case_id's since iter_forms won't return deleted forms FormAccessors(domain).soft_delete_forms(list(form_id_list), deletion_date, deletion_id) detail = UserArchivedRebuild(user_id=user_id) for case_id in cases_to_rebuild - deleted_cases: _rebuild_case_with_retries.delay(domain, case_id, detail)
def get_cases_from_form(domain, form): from corehq.form_processor.parsers.ledgers.form import get_case_ids_from_stock_transactions case_ids = get_case_ids_from_form( form) | get_case_ids_from_stock_transactions(form) return CaseAccessors(domain).get_cases(list(case_ids))
def rebuild_form_cases(sender, xform, *args, **kwargs): from casexml.apps.case.xform import get_case_ids_from_form from casexml.apps.case.cleanup import rebuild_case for case_id in get_case_ids_from_form(xform): rebuild_case(case_id)
def _get_cases_from_form(domain, form): from corehq.form_processor.parsers.ledgers.form import get_case_ids_from_stock_transactions case_ids = get_case_ids_from_form(form) | get_case_ids_from_stock_transactions(form) return CaseAccessors(domain).get_cases(list(case_ids))
def transform_xform_for_elasticsearch(doc_dict): """ Given an XFormInstance, return a copy that is ready to be sent to elasticsearch, or None, if the form should not be saved to elasticsearch """ doc_ret = copy.deepcopy(doc_dict) if 'meta' in doc_ret['form']: if not is_valid_date(doc_ret['form']['meta'].get('timeEnd', None)): doc_ret['form']['meta']['timeEnd'] = None if not is_valid_date(doc_ret['form']['meta'].get('timeStart', None)): doc_ret['form']['meta']['timeStart'] = None # Some docs have their @xmlns and #text here if isinstance(doc_ret['form']['meta'].get('appVersion'), dict): doc_ret['form']['meta']['appVersion'] = doc_ret['form']['meta'][ 'appVersion'].get('#text') app_version_info = get_app_version_info( doc_ret['domain'], doc_ret.get('build_id'), doc_ret.get('version'), doc_ret['form']['meta'], ) doc_ret['form']['meta'][ 'commcare_version'] = app_version_info.commcare_version doc_ret['form']['meta'][ 'app_build_version'] = app_version_info.build_version try: geo_point = GeoPointProperty().wrap( doc_ret['form']['meta']['location']) doc_ret['form']['meta']['geo_point'] = geo_point.lat_lon except (KeyError, BadValueError): doc_ret['form']['meta']['geo_point'] = None pass try: user_id = doc_ret['form']['meta']['userID'] except KeyError: user_id = None doc_ret['user_type'] = get_user_type(user_id) doc_ret['inserted_at'] = datetime.datetime.utcnow().isoformat() case_blocks = extract_case_blocks(doc_ret) for case_dict in case_blocks: for date_modified_key in ['date_modified', '@date_modified']: if not is_valid_date(case_dict.get(date_modified_key, None)): if case_dict.get(date_modified_key) == '': case_dict[date_modified_key] = None else: case_dict.pop(date_modified_key, None) # convert all mapped dict properties to nulls if they are empty strings for object_key in ['index', 'attachment', 'create', 'update']: if object_key in case_dict and not isinstance( case_dict[object_key], dict): case_dict[object_key] = None doc_ret["__retrieved_case_ids"] = list(get_case_ids_from_form(doc_dict)) return doc_ret