def iter_ledger_diffs(case_ids, dd_count): couch_state_map = { state.ledger_reference: state for state in StockState.objects.filter(case_id__in=case_ids) } sql_refs = set() for ledger_value in LedgerAccessorSQL.get_ledger_values_for_cases(case_ids): ref = ledger_value.ledger_reference sql_refs.add(ref) couch_state = couch_state_map.get(ref, None) if couch_state is None: couch_json = get_stock_state_json(ledger_value) dd_count("commcare.couchsqlmigration.ledger.rebuild") else: couch_json = couch_state.to_json() dd_count("commcare.couchsqlmigration.ledger.diffed") diffs = json_diff(couch_json, ledger_value.to_json(), track_list_indices=False) diffs = filter_ledger_diffs(diffs) if diffs: dd_count("commcare.couchsqlmigration.ledger.has_diff") yield "stock state", ref.as_id(), diffs for ref, couch_state in couch_state_map.items(): if ref not in sql_refs: diffs = json_diff(couch_state.to_json(), {}, track_list_indices=False) dd_count("commcare.couchsqlmigration.ledger.diffed") dd_count("commcare.couchsqlmigration.ledger.has_diff") yield "stock state", ref.as_id(), filter_ledger_diffs(diffs)
def check_domains(case_id, couch_json, sql_json): if couch_json["domain"] == _diff_state.domain: if sql_json["domain"] == _diff_state.domain: return [] log.warning("sql case %s has wrong domain: %s", case_id, sql_json["domain"]) diffs = json_diff({"domain": _diff_state.domain}, {"domain": sql_json["domain"]}) else: log.warning("couch case %s has wrong domain: %s", case_id, couch_json["domain"]) diffs = json_diff({"domain": couch_json["domain"]}, {"domain": _diff_state.domain}) assert diffs, "expected domain diff" return diffs
def test_case_name_missing(self): couch_case = {'doc_type': 'CommCareCase'} sql_case = {'doc_type': 'CommCareCase', 'name': 'thing'} diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual( filtered, [FormJsonDiff('missing', ('name', ), MISSING, 'thing')])
def test_single_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [{ "doc_type": "CommCareCaseIndex", "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } del sql_case['indices'][0]['doc_type'] sql_case['indices'][0]['relationship'] = 'extension' expected_diffs = [ FormJsonDiff( diff_type='diff', path=('indices', '[*]', 'relationship'), old_value='child', new_value='extension', ) ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def _filter_case_index_diffs(couch_case, sql_case, diffs): """Indices may be in different order - re-sort and compare again. """ if 'indices' not in couch_case: return diffs remaining_diffs = [diff for diff in diffs if diff.path[0] != 'indices'] if len(remaining_diffs) == len(diffs): return diffs couch_indices = couch_case['indices'] sql_indices = sql_case['indices'] if len(couch_indices) > 1: new_index_diffs = [] couch_indices = sorted(couch_indices, key=lambda i: i['identifier']) sql_indices = sorted(sql_indices, key=lambda i: i['identifier']) for diff in json_diff(couch_indices, sql_indices, track_list_indices=False): diff_dict = diff._asdict() # convert the path back to what it should be diff_dict['path'] = tuple(['indices'] + list(diff.path)) new_index_diffs.append(FormJsonDiff(**diff_dict)) new_index_diffs = _filter_partial_matches(new_index_diffs, PARTIAL_DIFFS['CommCareCaseIndex']) remaining_diffs.extend(new_index_diffs) return remaining_diffs else: return diffs
def _filter_case_attachment_diffs(couch_case, sql_case, diffs): """Attachment JSON format is different between Couch and SQL""" remaining_diffs = [diff for diff in diffs if diff.path[0] != 'case_attachments'] if len(remaining_diffs) != len(diffs): couch_attachments = couch_case.get('case_attachments', {}) sql_attachments = sql_case.get('case_attachments', {}) for name, couch_att in couch_attachments.items(): sql_att = sql_attachments.get(name, Ellipsis) if sql_att == Ellipsis: remaining_diffs.append(FormJsonDiff( diff_type='missing', path=('case_attachments', name), old_value=couch_att, new_value=sql_att )) else: att_diffs = json_diff(couch_att, sql_att) filtered = _filter_partial_matches(att_diffs, PARTIAL_DIFFS['case_attachment']) filtered = _filter_renamed_fields(filtered, couch_att, sql_att, 'case_attachment') for diff in filtered: diff_dict = diff._asdict() # convert the path back to what it should be diff_dict['path'] = tuple(['case_attachments', name] + list(diff.path)) remaining_diffs.append(FormJsonDiff(**diff_dict)) return remaining_diffs
def case_attachments(old_obj, new_obj, rule, original_diff): """Attachment JSON format is different between Couch and SQL""" if original_diff.path[0] != "case_attachments": return False diffs = [] old_attachments = old_obj.get("case_attachments", {}) new_attachments = new_obj.get("case_attachments", {}) for name in set(old_attachments) | set(new_attachments): old_att = old_attachments.get(name, MISSING) new_att = new_attachments.get(name, MISSING) if old_att is MISSING or new_att is MISSING: diffs.append( FormJsonDiff( diff_type='missing', path=('case_attachments', name), old_value=old_att, new_value=new_att, )) else: att_diffs = json_diff(old_att, new_att) for diff in _filter_ignored(old_att, new_att, att_diffs, ['case_attachment']): # convert the path back to what it should be diff = diff._replace(path=('case_attachments', name) + diff.path) diffs.append(diff) if diffs: raise ReplaceDiff(diffs) return True
def test_case_indices_order(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [{ "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }, { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "goal", "referenced_id": "c2e938d9-7406-4fdf-87ab-67d92296705e", "referenced_type": "careplan_goal", "relationship": "child" }] } sql_case = { 'doc_type': 'CommCareCase', 'indices': list(reversed(couch_case['indices'])) } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(6, len(diffs)) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual([], filtered_diffs)
def test_multiple_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [{ "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }, { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "goal", "referenced_id": "c2e938d9-7406-4fdf-87ab-67d92296705e", "referenced_type": "careplan_goal", "relationship": "child" }] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } sql_case['indices'][0]['identifier'] = 'mother' expected_diffs = [ FormJsonDiff(diff_type='diff', path=('indices', '[*]', 'identifier'), old_value='parent', new_value='mother') ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def test_multiple_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [ { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }, { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "goal", "referenced_id": "c2e938d9-7406-4fdf-87ab-67d92296705e", "referenced_type": "careplan_goal", "relationship": "child" } ] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } sql_case['indices'][0]['identifier'] = 'mother' expected_diffs = [ FormJsonDiff( diff_type='diff', path=('indices', '[*]', 'identifier'), old_value=u'parent', new_value=u'mother') ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def test_case_name_diff(self): couch_case = {'doc_type': 'CommCareCase', 'name': 'shorter'} sql_case = {'doc_type': 'CommCareCase', 'name': 'short'} diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual( filtered, [FormJsonDiff('diff', ('name', ), 'shorter', 'short')])
def test_single_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [ { "doc_type": "CommCareCaseIndex", "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" } ] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } del sql_case['indices'][0]['doc_type'] sql_case['indices'][0]['relationship'] = 'extension' expected_diffs = [ FormJsonDiff(diff_type='diff', path=('indices', '[*]', 'relationship'), old_value=u'child', new_value=u'extension') ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def _diff_cases(self, couch_cases): from corehq.apps.tzmigration.timezonemigration import json_diff self.log_debug('Calculating case diffs for {} cases'.format( len(couch_cases))) case_ids = list(couch_cases) sql_cases = CaseAccessorSQL.get_cases(case_ids) for sql_case in sql_cases: couch_case = couch_cases[sql_case.case_id] sql_case_json = sql_case.to_json() diffs = json_diff(couch_case, sql_case_json, track_list_indices=False) diffs = filter_case_diffs( couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions) if diffs and not sql_case.is_deleted: couch_case, diffs = self._rebuild_couch_case_and_re_diff( couch_case, sql_case_json) if diffs: self.diff_db.add_diffs(couch_case['doc_type'], sql_case.case_id, diffs) self._diff_ledgers(case_ids) self.processed_docs += len(case_ids) self._log_case_diff_count(throttled=True)
def _diff_cases(self, couch_cases): from corehq.apps.tzmigration.timezonemigration import json_diff self.log_debug('Calculating case diffs for {} cases'.format(len(couch_cases))) case_ids = list(couch_cases) sql_cases = CaseAccessorSQL.get_cases(case_ids) for sql_case in sql_cases: couch_case = couch_cases[sql_case.case_id] sql_case_json = sql_case.to_json() diffs = json_diff(couch_case, sql_case_json, track_list_indices=False) diffs = filter_case_diffs( couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions ) if diffs and not sql_case.is_deleted: couch_case, diffs = self._rebuild_couch_case_and_re_diff(couch_case, sql_case_json) if diffs: self.diff_db.add_diffs( couch_case['doc_type'], sql_case.case_id, diffs ) self._diff_ledgers(case_ids) self.processed_docs += len(case_ids) self._log_case_diff_count(throttled=True)
def _test_form_diff_filter(self, couch_form, sql_form, diffs=None, expected=REAL_DIFFS): if diffs is None: diffs = json_diff(couch_form, sql_form, track_list_indices=False) self.assertTrue(diffs) diffs += REAL_DIFFS filtered = filter_form_diffs(couch_form, sql_form, diffs) self.assertEqual(filtered, expected)
def test_weird_case_attributes(self): couch_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "person-2" }], "doc_type": "CommCareCase", "@user_id": "person-1", "user_id": "person-2", "@date_modified": "2016-02-12", "modified_on": "2016-02-12T00:00:00.000000Z", } sql_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "person-2" }], "doc_type": "CommCareCase", "user_id": "person-2", "modified_on": "2016-02-12T10:00:00.000000Z", } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def test_non_user_owner_mapping_case_with_opened_and_user_diffs(self): couch_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "somebody" }], "doc_type": "CommCareCase", "opened_by": "", "user_id": "", } sql_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "somebody" }], "doc_type": "CommCareCase", "opened_by": "somebody", "user_id": "somebody", } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [ FormJsonDiff(diff_type='diff', path=('opened_by', ), old_value='', new_value='somebody'), FormJsonDiff(diff_type='diff', path=('user_id', ), old_value='', new_value='somebody'), ])
def test_case_indices_order(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [ { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }, { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "goal", "referenced_id": "c2e938d9-7406-4fdf-87ab-67d92296705e", "referenced_type": "careplan_goal", "relationship": "child" } ] } sql_case = { 'doc_type': 'CommCareCase', 'indices': list(reversed(couch_case['indices'])) } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(6, len(diffs)) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual([], filtered_diffs)
def test_filter_case_xform_id_diffs_good(self): couch_case = {'doc_type': 'CommCareCase', 'xform_ids': ['123', '456']} sql_case = {'doc_type': 'CommCareCase', 'xform_ids': ['456', '123']} diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(2, len(diffs)) filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(filtered, REAL_DIFFS)
def _save_diffs(self, couch_form, sql_form): from corehq.apps.tzmigration.timezonemigration import json_diff couch_form_json = couch_form.to_json() sql_form_json = sql_form.to_json() diffs = json_diff(couch_form_json, sql_form_json, track_list_indices=False) self.diff_db.add_diffs( couch_form.doc_type, couch_form.form_id, filter_form_diffs(couch_form_json, sql_form_json, diffs) )
def save_form_diffs(self, couch_json, sql_json): diffs = json_diff(couch_json, sql_json, track_list_indices=False) diffs = filter_form_diffs(couch_json, sql_json, diffs) dd_count = partial(metrics_counter, tags={"domain": self.domain}) dd_count("commcare.couchsqlmigration.form.diffed") doc_type = couch_json["doc_type"] doc_id = couch_json["_id"] self.add_diffs(doc_type, doc_id, diffs) if diffs: dd_count("commcare.couchsqlmigration.form.has_diff")
def test_case_obsolete_location_field(self): couch_case = {'doc_type': 'CommCareCase', 'location_': ['abc', 'def']} sql_case = { 'doc_type': 'CommCareCase', } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertTrue(diffs) self.assertEqual(filter_case_diffs(couch_case, sql_case, diffs), [])
def test_filter_missing_case_deletion_id(self): couch_case = { 'doc_type': 'CommCareCase-Deleted', } sql_case = { 'doc_type': 'CommCareCase-Deleted', 'deletion_id': None, } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def test_case_with_empty_text_node(self): couch_case = { "doc_type": "CommCareCase", "#text": "", } sql_case = { "doc_type": "CommCareCase", } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def test_case_time_value_diff(self): couch_case = { "doc_type": "CommCareCase", "some_property": "11:49:00", } sql_case = { "doc_type": "CommCareCase", "some_property": "11:49:00.000", } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def test_filter_form_rename_fields_good(self): couch_form = { 'doc_type': 'XFormDeprecated', 'deprecated_date': 'abc', } sql_form = { 'doc_type': 'XFormDeprecated', 'edited_on': 'abc', } diffs = json_diff(couch_form, sql_form, track_list_indices=False) assert len(diffs) == 2, diffs self._test_form_diff_filter(couch_form, sql_form, diffs + REAL_DIFFS)
def test_filter_case_deleted_on_in_sql(self): couch_case = { 'doc_type': 'CommCareCase-Deleted', } sql_case = { 'doc_type': 'CommCareCase-Deleted', 'deleted_on': '123', } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertTrue(diffs) filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(filtered, REAL_DIFFS)
def diff_case(sql_case, couch_case, statedb): sql_case_json = sql_case.to_json() diffs = json_diff(couch_case, sql_case_json, track_list_indices=False) diffs = filter_case_diffs(couch_case, sql_case_json, diffs, statedb) if diffs and not sql_case.is_deleted: try: couch_case, diffs = rebuild_couch_case_and_re_diff( couch_case, sql_case_json, statedb) except Exception as err: log.warning('Case {} rebuild -> {}: {}'.format( sql_case.case_id, type(err).__name__, err)) return couch_case, diffs
def _rebuild_couch_case_and_re_diff(self, couch_case, sql_case_json): from corehq.form_processor.backends.couch.processor import FormProcessorCouch from corehq.apps.tzmigration.timezonemigration import json_diff rebuilt_case = FormProcessorCouch.hard_rebuild_case( self.domain, couch_case['_id'], None, save=False, lock=False ) rebuilt_case_json = rebuilt_case.to_json() diffs = json_diff(rebuilt_case_json, sql_case_json, track_list_indices=False) diffs = filter_case_diffs( rebuilt_case_json, sql_case_json, diffs, self.forms_that_touch_cases_without_actions ) return rebuilt_case_json, diffs
def test_filter_modified_on(self): couch_case = { 'doc_type': 'CommCareCase', 'modified_on': '2015-03-23T14:36:53Z' } sql_case = { 'doc_type': 'CommCareCase', 'modified_on': '2015-03-23T14:36:53.073000Z' } date_diffs = json_diff(couch_case, sql_case) self.assertEqual(1, len(date_diffs)) filtered = filter_case_diffs(couch_case, sql_case, date_diffs) self.assertEqual(filtered, [])
def test_filter_combo_fields(self): couch_case = { 'doc_type': 'CommCareCase', '@date_modified': '2015-03-23T14:36:53Z' } sql_case = { 'doc_type': 'CommCareCase', 'modified_on': '2015-03-23T14:36:53.073000Z' } rename_date_diffs = json_diff(couch_case, sql_case) self.assertEqual(2, len(rename_date_diffs)) filtered = filter_case_diffs(couch_case, sql_case, rename_date_diffs) self.assertEqual(filtered, [])
def diff_ledgers(case_ids, statedb): log.debug('Calculating ledger diffs for {} cases'.format(len(case_ids))) couch_state_map = { state.ledger_reference: state for state in StockState.objects.filter(case_id__in=case_ids) } for ledger_value in LedgerAccessorSQL.get_ledger_values_for_cases( case_ids): couch_state = couch_state_map.get(ledger_value.ledger_reference, None) diffs = json_diff(couch_state.to_json(), ledger_value.to_json(), track_list_indices=False) statedb.add_diffs('stock state', ledger_value.ledger_reference.as_id(), filter_ledger_diffs(diffs))
def rebuild_couch_case_and_re_diff(couch_case, sql_case_json, statedb): assert couch_case["domain"] == sql_case_json["domain"], \ (couch_case["domain"], sql_case_json["domain"]) rebuilt_case = FormProcessorCouch.hard_rebuild_case(couch_case["domain"], couch_case['_id'], None, save=False, lock=False) rebuilt_case_json = rebuilt_case.to_json() diffs = json_diff(rebuilt_case_json, sql_case_json, track_list_indices=False) diffs = filter_case_diffs(rebuilt_case_json, sql_case_json, diffs, statedb) return rebuilt_case_json, diffs
def test_case_with_location_and_referrals(self): couch_case = { "doc_type": "CommCareCase", "location_": [], "referrals": [], } sql_case = { "doc_type": "CommCareCase", "location_": '[]', "referrals": '[]', } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def _diff_cases(self, couch_cases): from corehq.apps.tzmigration.timezonemigration import json_diff self.log_debug('Calculating case diffs for {} cases'.format(len(couch_cases))) case_ids = list(couch_cases) sql_cases = CaseAccessorSQL.get_cases(case_ids) for sql_case in sql_cases: couch_case = couch_cases[sql_case.case_id] sql_case_json = sql_case.to_json() diffs = json_diff(couch_case, sql_case_json, track_list_indices=False) self.diff_db.add_diffs( couch_case['doc_type'], sql_case.case_id, filter_case_diffs(couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions) ) self._diff_ledgers(case_ids)
def test_filter_case_xform_id_diffs_good(self): couch_case = { 'doc_type': 'CommCareCase', 'xform_ids': ['123', '456'] } sql_case = { 'doc_type': 'CommCareCase', 'xform_ids': ['456', '123'] } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(2, len(diffs)) filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(set(filtered), set([ FormJsonDiff(diff_type='list_order', path=('xform_ids', '[*]'), old_value=None, new_value=None) ] + REAL_DIFFS))
def test_filter_case_user_id(self): couch_case = { 'doc_type': 'CommCareCase', 'user_id': u'e7ad965c70802884a7a67add763939e8', '@user_id': u'e7ad965c70802884a7a67add763939e8', '@case_id': u'5ac45838-da5b-49f5-b236-0675ff924e9f' } sql_case = { 'doc_type': 'CommCareCase', 'user_id': u'e7ad965c70802884a7a67add763939e8', 'case_id': u'5ac45838-da5b-49f5-b236-0675ff924e9f' } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [])
def _diff_ledgers(self, case_ids): from corehq.apps.tzmigration.timezonemigration import json_diff from corehq.apps.commtrack.models import StockState couch_state_map = { state.ledger_reference: state for state in StockState.objects.filter(case_id__in=case_ids) } self.log_debug('Calculating ledger diffs for {} cases'.format(len(case_ids))) for ledger_value in LedgerAccessorSQL.get_ledger_values_for_cases(case_ids): couch_state = couch_state_map.get(ledger_value.ledger_reference, None) diffs = json_diff(couch_state.to_json(), ledger_value.to_json(), track_list_indices=False) self.diff_db.add_diffs( 'stock state', ledger_value.ledger_reference.as_id(), filter_ledger_diffs(diffs) )
def test_filter_form_rename_fields_bad(self): couch_form = { 'doc_type': 'XFormDeprecated', 'deprecated_date': 'abc', } sql_form = { 'doc_type': 'XFormDeprecated', 'edited_on': '123', } diffs = json_diff(couch_form, sql_form, track_list_indices=False) self._test_form_diff_filter( couch_form, sql_form, diffs, [FormJsonDiff( diff_type='complex', path=('deprecated_date', 'edited_on'), old_value='abc', new_value='123' )] )
def case_index_order(old_obj, new_obj, rule, diff): """Attachment order may be different between Couch and SQL""" if diff.path[0] != "indices" or len(old_obj['indices']) < 2: return False def key(index): return index['identifier'] diffs = [] old_indices = sorted(old_obj['indices'], key=key) new_indices = sorted(new_obj['indices'], key=key) for diff in json_diff(old_indices, new_indices, track_list_indices=False): # convert the path back to what it should be diff = diff._replace(path=('indices',) + diff.path) diffs.append(diff) if diffs: raise ReplaceDiff(diffs) return True
def test_filter_usercase_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'hq_user_id': '123', 'external_id': '', 'type': 'commcare-user' } sql_case = { 'doc_type': 'CommCareCase', 'external_id': '123', 'type': 'commcare-user' } user_case_diffs = json_diff(couch_case, sql_case) self.assertEqual(2, len(user_case_diffs)) filtered = filter_case_diffs(couch_case, sql_case, user_case_diffs + REAL_DIFFS) self.assertEqual(filtered, REAL_DIFFS)
def test_filter_case_xform_id_diffs_bad(self): couch_case = { 'doc_type': 'CommCareCase', 'xform_ids': ['123', '456'] } sql_case = { 'doc_type': 'CommCareCase', 'xform_ids': ['123', 'abc'] } diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(1, len(diffs)) diffs = [ FormJsonDiff(diff_type=u'diff', path=('xform_ids', '[*]'), old_value=u'456', new_value=u'abc') ] expected_diffs = REAL_DIFFS + [ FormJsonDiff(diff_type='set_mismatch', path=('xform_ids', '[*]'), old_value='456', new_value='abc') ] filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(filtered, expected_diffs)
def test_filter_usercase_diff_bad(self): couch_case = { 'doc_type': 'CommCareCase', 'hq_user_id': '123', 'type': 'commcare-user' } sql_case = { 'doc_type': 'CommCareCase', 'type': 'commcare-user' } user_case_diffs = json_diff(couch_case, sql_case) self.assertEqual(1, len(user_case_diffs)) filtered = filter_case_diffs(couch_case, sql_case, user_case_diffs) self.assertEqual(filtered, [ FormJsonDiff( diff_type='complex', path=('hq_user_id', 'external_id'), old_value='123', new_value=Ellipsis ) ])
def test_case_attachments(self): couch_case = { 'doc_type': 'CommCareCase', 'case_attachments': { 'xyz': { 'doc_type': 'ignored', 'attachment_properties': 'ignored', 'attachment_from': 'ignored', 'attachment_src': 'ignored', 'server_mime': 'ignored', 'attachment_name': 'ignored', 'server_md5': 'ignored', 'identifier': 'xyz', 'attachment_size': 123, 'unexpected': 'value', 'properties': 'value', }, }, } sql_case = { 'doc_type': 'CommCareCase', 'case_attachments': { 'xyz': { 'name': 'xyz', 'content_length': 123, 'content_type': 'ignored-sql', # for testing only, not an expected transformation 'properties': 'eulav', }, }, } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [ FormJsonDiff('missing', ('case_attachments', 'xyz', 'unexpected'), 'value', MISSING), FormJsonDiff('diff', ('case_attachments', 'xyz', 'properties'), 'value', 'eulav'), ])
def case_attachments(old_obj, new_obj, rule, original_diff): """Attachment JSON format is different between Couch and SQL""" if original_diff.path[0] != "case_attachments": return False diffs = [] old_attachments = old_obj.get("case_attachments", {}) new_attachments = new_obj.get("case_attachments", {}) for name in set(old_attachments) | set(new_attachments): old_att = old_attachments.get(name, MISSING) new_att = new_attachments.get(name, MISSING) if old_att is MISSING or new_att is MISSING: diffs.append(FormJsonDiff( diff_type='missing', path=('case_attachments', name), old_value=old_att, new_value=new_att, )) else: att_diffs = json_diff(old_att, new_att) for diff in _filter_ignored(old_att, new_att, att_diffs, ['case_attachment']): # convert the path back to what it should be diff = diff._replace(path=('case_attachments', name) + diff.path) diffs.append(diff) if diffs: raise ReplaceDiff(diffs) return True