def _filter_xform_id_diffs(couch_case, sql_case, diffs): """Some couch docs have the xform ID's out of order so assume that if both docs contain the same set of xform IDs then they are the same""" xform_id_diffs = { diff for diff in diffs if diff.path == ('xform_ids', '[*]') } if not xform_id_diffs: return diffs ids_in_couch = set(couch_case['xform_ids']) ids_in_sql = set(sql_case['xform_ids']) if ids_in_couch ^ ids_in_sql: couch_only = ','.join(list(ids_in_couch - ids_in_sql)) sql_only = ','.join(list(ids_in_sql - ids_in_couch)) diffs.append( FormJsonDiff(diff_type='set_mismatch', path=('xform_ids', '[*]'), old_value=couch_only, new_value=sql_only)) else: diffs.append( FormJsonDiff(diff_type='list_order', path=('xform_ids', '[*]'), old_value=None, new_value=None)) return [diff for diff in diffs if diff not in xform_id_diffs]
def test_non_user_owner_mapping_case_with_opened_and_user_diffs(self): couch_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "somebody" }], "doc_type": "CommCareCase", "opened_by": "", "user_id": "", } sql_case = { "case_id": "eca7a8", "actions": [{ "action_type": "create", "user_id": "somebody" }], "doc_type": "CommCareCase", "opened_by": "somebody", "user_id": "somebody", } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [ FormJsonDiff(diff_type='diff', path=('opened_by', ), old_value='', new_value='somebody'), FormJsonDiff(diff_type='diff', path=('user_id', ), old_value='', new_value='somebody'), ])
def _filter_case_attachment_diffs(diffs): attachment_diffs = [ diff for diff in diffs if diff.path[0] == 'case_attachments' ] if not attachment_diffs: return diffs diffs = [diff for diff in diffs if diff not in attachment_diffs] grouped_diffs = groupby(attachment_diffs, lambda diff: diff.path[1]) for name, group in grouped_diffs: group = list(group) normalized_diffs = [ FormJsonDiff(diff_type=diff.diff_type, path=(diff.path[-1], ), old_value=diff.old_value, new_value=diff.new_value) for diff in group ] filtered = _filter_partial_matches(normalized_diffs, PARTIAL_DIFFS['case_attachment']) filtered = _filter_renamed_fields(filtered, 'case_attachment') if filtered: diffs.extend([ FormJsonDiff(diff_type=diff.diff_type, path=(u'case_attachments', name, diff.path[-1]), old_value=diff.old_value, new_value=diff.new_value) for diff in filtered ]) return diffs
def test_filter_form_rename_fields(self): good_rename_diffs = [ FormJsonDiff(diff_type='missing', path=('deprecated_date', ), old_value='abc', new_value=Ellipsis), FormJsonDiff(diff_type='missing', path=('edited_on', ), old_value=Ellipsis, new_value='abc'), ] bad_rename_diffs = [ FormJsonDiff(diff_type='missing', path=('deprecated_date', ), old_value='abc', new_value=Ellipsis), FormJsonDiff(diff_type='missing', path=('edited_on', ), old_value=Ellipsis, new_value='123'), ] self._test_form_diff_filter( 'XFormDeprecated', good_rename_diffs + bad_rename_diffs + REAL_DIFFS, bad_rename_diffs + REAL_DIFFS)
def _filter_case_attachment_diffs(couch_case, sql_case, diffs): """Attachment JSON format is different between Couch and SQL""" remaining_diffs = [diff for diff in diffs if diff.path[0] != 'case_attachments'] if len(remaining_diffs) != len(diffs): couch_attachments = couch_case.get('case_attachments', {}) sql_attachments = sql_case.get('case_attachments', {}) for name, couch_att in couch_attachments.items(): sql_att = sql_attachments.get(name, Ellipsis) if sql_att == Ellipsis: remaining_diffs.append(FormJsonDiff( diff_type='missing', path=('case_attachments', name), old_value=couch_att, new_value=sql_att )) else: att_diffs = json_diff(couch_att, sql_att) filtered = _filter_partial_matches(att_diffs, PARTIAL_DIFFS['case_attachment']) filtered = _filter_renamed_fields(filtered, couch_att, sql_att, 'case_attachment') for diff in filtered: diff_dict = diff._asdict() # convert the path back to what it should be diff_dict['path'] = tuple(['case_attachments', name] + list(diff.path)) remaining_diffs.append(FormJsonDiff(**diff_dict)) return remaining_diffs
def _get_partial_diffs(doc_type): diff_defaults = FormJsonDiff(diff_type='type', path=None, old_value=0, new_value=1)._asdict() return [ FormJsonDiff(**dict(diff_defaults, **partial)) for partial in PARTIAL_DIFFS[doc_type] ]
def show_diffs(self): for diff in self.planning_db.get_diffs(): json_diff = diff.json_diff if json_diff.diff_type == 'diff': if is_datetime_string(json_diff.old_value) and is_datetime_string(json_diff.new_value): continue if json_diff in ( FormJsonDiff(diff_type='type', path=['external_id'], old_value='', new_value=None), FormJsonDiff(diff_type='type', path=['closed_by'], old_value='', new_value=None)): continue print('[{}] {}'.format(diff.doc_id, json_diff))
def test_filter_ledger_diffs(self): ignored_diffs = _make_ignored_diffs('LedgerValue') + [ FormJsonDiff( diff_type='diff', path=('last_modified',), old_value='2016-04-01T00:00:00.000000Z', new_value='2016-04-01T15:39:19.711333Z', ), FormJsonDiff( diff_type='type', path=('last_modified_form_id',), old_value=None, new_value='7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5', ), ] filtered = filter_ledger_diffs(ignored_diffs + REAL_DIFFS) self.assertEqual(filtered, REAL_DIFFS)
def test_filter_combo_fields(self): couch_case = {'doc_type': 'CommCareCase'} rename_date_diffs = [ FormJsonDiff(diff_type='missing', path=('@date_modified', ), old_value='2015-03-23T14:36:53Z', new_value=Ellipsis), FormJsonDiff(diff_type='missing', path=('modified_on', ), old_value=Ellipsis, new_value='2015-03-23T14:36:53.073000Z'), ] diffs = rename_date_diffs + REAL_DIFFS filtered = filter_case_diffs(couch_case, {}, diffs) self.assertEqual(filtered, REAL_DIFFS)
def test_xform_ids_diff(self): case_id = uuid.uuid4().hex submit_case_blocks( CaseBlock( case_id, case_type='migrate', create=True, update={ 'p1': 1 }, ).as_string(), self.domain_name) submit_case_blocks( CaseBlock( case_id, update={ 'p2': 2 }, ).as_string(), self.domain_name) case = CaseAccessors(self.domain_name).get_case(case_id) removed_form_id = case.xform_ids.pop(1) case.save() self.assertEqual(1, len(self._get_case_ids())) self._do_migration_and_assert_flags(self.domain_name) self.assertEqual(1, len(self._get_case_ids())) self._compare_diffs([(u'CommCareCase', FormJsonDiff(diff_type=u'set_mismatch', path=[u'xform_ids', u'[*]'], old_value=u'', new_value=removed_form_id))])
def show_diffs(self): for form_id, json_diff in self.planning_db.get_diffs(): if json_diff.diff_type == 'diff': if _is_datetime(json_diff.old_value) and _is_datetime( json_diff.new_value): continue if json_diff in (FormJsonDiff(diff_type=u'type', path=[u'external_id'], old_value=u'', new_value=None), FormJsonDiff(diff_type=u'type', path=[u'closed_by'], old_value=u'', new_value=None)): continue print '[{}] {}'.format(form_id, json_diff)
def _filter_forms_touch_case(diffs, forms_that_touch_cases_without_actions): """Legacy bug in case processing would not add the form ID to the list of xform_ids for the case if the case block had no actions""" if not forms_that_touch_cases_without_actions: return diffs form_id_diffs = [ diff for diff in diffs if diff.diff_type == 'set_mismatch' and diff.path[0] == ('xform_ids') ] if not len(form_id_diffs): return diffs for diff in form_id_diffs: diffs.remove(diff) form_ids = diff.new_value.split(',') diff_ids = [ form_id for form_id in form_ids if form_id not in forms_that_touch_cases_without_actions ] if diff_ids: diff_dict = diff._asdict() diff_dict['new_value'] = ','.join(diff_ids) diffs.append(FormJsonDiff(**diff_dict)) return diffs
def test_case_name_missing(self): couch_case = {'doc_type': 'CommCareCase'} sql_case = {'doc_type': 'CommCareCase', 'name': 'thing'} diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual( filtered, [FormJsonDiff('missing', ('name', ), MISSING, 'thing')])
def test_single_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [{ "doc_type": "CommCareCaseIndex", "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } del sql_case['indices'][0]['doc_type'] sql_case['indices'][0]['relationship'] = 'extension' expected_diffs = [ FormJsonDiff( diff_type='diff', path=('indices', '[*]', 'relationship'), old_value='child', new_value='extension', ) ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def _filter_case_index_diffs(couch_case, sql_case, diffs): """Indices may be in different order - re-sort and compare again. """ if 'indices' not in couch_case: return diffs remaining_diffs = [diff for diff in diffs if diff.path[0] != 'indices'] if len(remaining_diffs) == len(diffs): return diffs couch_indices = couch_case['indices'] sql_indices = sql_case['indices'] if len(couch_indices) > 1: new_index_diffs = [] couch_indices = sorted(couch_indices, key=lambda i: i['identifier']) sql_indices = sorted(sql_indices, key=lambda i: i['identifier']) for diff in json_diff(couch_indices, sql_indices, track_list_indices=False): diff_dict = diff._asdict() # convert the path back to what it should be diff_dict['path'] = tuple(['indices'] + list(diff.path)) new_index_diffs.append(FormJsonDiff(**diff_dict)) new_index_diffs = _filter_partial_matches(new_index_diffs, PARTIAL_DIFFS['CommCareCaseIndex']) remaining_diffs.extend(new_index_diffs) return remaining_diffs else: return diffs
def test_multiple_case_indices_real_diff(self): couch_case = { 'doc_type': 'CommCareCase', 'indices': [{ "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "parent", "referenced_id": "7ab03ccc-e5b7-4c8f-b88f-43ee3b0543a5", "referenced_type": "Patient", "relationship": "child" }, { "case_id": "fb698d47-4832-42b2-b28c-86d13adb45a2", "identifier": "goal", "referenced_id": "c2e938d9-7406-4fdf-87ab-67d92296705e", "referenced_type": "careplan_goal", "relationship": "child" }] } sql_case = { 'doc_type': 'CommCareCase', 'indices': deepcopy(couch_case['indices']) } sql_case['indices'][0]['identifier'] = 'mother' expected_diffs = [ FormJsonDiff(diff_type='diff', path=('indices', '[*]', 'identifier'), old_value='parent', new_value='mother') ] diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered_diffs = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(expected_diffs, filtered_diffs)
def test_case_name_diff(self): couch_case = {'doc_type': 'CommCareCase', 'name': 'shorter'} sql_case = {'doc_type': 'CommCareCase', 'name': 'short'} diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual( filtered, [FormJsonDiff('diff', ('name', ), 'shorter', 'short')])
def case_attachments(old_obj, new_obj, rule, original_diff): """Attachment JSON format is different between Couch and SQL""" if original_diff.path[0] != "case_attachments": return False diffs = [] old_attachments = old_obj.get("case_attachments", {}) new_attachments = new_obj.get("case_attachments", {}) for name in set(old_attachments) | set(new_attachments): old_att = old_attachments.get(name, MISSING) new_att = new_attachments.get(name, MISSING) if old_att is MISSING or new_att is MISSING: diffs.append( FormJsonDiff( diff_type='missing', path=('case_attachments', name), old_value=old_att, new_value=new_att, )) else: att_diffs = json_diff(old_att, new_att) for diff in _filter_ignored(old_att, new_att, att_diffs, ['case_attachment']): # convert the path back to what it should be diff = diff._replace(path=('case_attachments', name) + diff.path) diffs.append(diff) if diffs: raise ReplaceDiff(diffs) return True
def test_form_with_number_with_extra_leading_zero(self): couch_form = { "doc_type": "XFormInstance", "form": { "case": { "update": { "floating": "6.2", "NaN": "fab", } } }, } sql_form = { "doc_type": "XFormInstance", "form": { "case": { "update": { "floating": "006.2", "NaN": "00fab", } } }, } self._test_form_diff_filter( couch_form, sql_form, expected=[ FormJsonDiff(diff_type='diff', path=('form', 'case', 'update', 'NaN'), old_value='fab', new_value='00fab') ] + REAL_DIFFS)
def test_filter_case_xform_id_diffs_good(self): couch_case = {'doc_type': 'CommCareCase', 'xform_ids': ['123', '456']} sql_case = {'doc_type': 'CommCareCase', 'xform_ids': ['456', '123']} diffs = [ FormJsonDiff(diff_type=u'diff', path=('xform_ids', '[*]'), old_value=u'123', new_value=u'456'), FormJsonDiff(diff_type=u'diff', path=('xform_ids', '[*]'), old_value=u'455', new_value=u'123') ] filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(filtered, REAL_DIFFS)
def test_missing_rules(): old_obj = {"flag": True} new_obj = {"is_val": True} def match(rule, diff): assert rule.matches(diff, old_obj, new_obj), (rule, diff) yield ( match, Ignore("diff", "flag", old=True, new=MISSING), FormJsonDiff("diff", ("flag", ), True, MISSING), ) yield ( match, Ignore("diff", "is_val", old=MISSING, new=True), FormJsonDiff("diff", ("is_val", ), MISSING, True), )
def __init__(self, diffs=None, **kw): if diffs is None: kw.setdefault("diff_type", "complex") kw.setdefault("old_value", None) kw.setdefault("new_value", None) self.diffs = [FormJsonDiff(**kw)] else: assert not kw, 'diffs and kw not allowed together' self.diffs = diffs
def test_filter_case_xform_id_diffs_bad(self): couch_case = {'doc_type': 'CommCareCase', 'xform_ids': ['123', '456']} sql_case = {'doc_type': 'CommCareCase', 'xform_ids': ['123', 'abc']} diffs = [ FormJsonDiff(diff_type=u'diff', path=('xform_ids', '[*]'), old_value=u'456', new_value=u'abc') ] expected_diffs = REAL_DIFFS + [ FormJsonDiff(diff_type='set_mismatch', path=('xform_ids', '[*]'), old_value='456', new_value='abc') ] filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual(filtered, expected_diffs)
def _make_ignored_diffs(doc_type): diff_defaults = dict(diff_type='type', path=('data',), old_value=0, new_value=1) diffs = [ FormJsonDiff(**_diff_args(rule, diff_defaults)) for type in [doc_type, doc_type + "*"] for rule in load_ignore_rules().get(type, []) if not _has_check(rule) ] assert diffs, "expected diffs for %s" % doc_type return diffs
def json_diff(self): from corehq.apps.tzmigration.timezonemigration import FormJsonDiff, MISSING def json_loads_or_missing(val): if val is None: return MISSING else: return json.loads(val) return FormJsonDiff(self.diff_type, json.loads(self.path), json_loads_or_missing(self.old_value), json_loads_or_missing(self.new_value))
def json_diff(self): from corehq.apps.tzmigration.timezonemigration import FormJsonDiff def json_loads_or_ellipsis(val): if val is None: return Ellipsis else: return json.loads(val) return FormJsonDiff(self.diff_type, json.loads(self.path), json_loads_or_ellipsis(self.old_value), json_loads_or_ellipsis(self.new_value))
def test_case_attachments(self): couch_case = { 'doc_type': 'CommCareCase', 'case_attachments': { 'xyz': { 'doc_type': 'ignored', 'attachment_properties': 'ignored', 'attachment_from': 'ignored', 'attachment_src': 'ignored', 'server_mime': 'ignored', 'attachment_name': 'ignored', 'server_md5': 'ignored', 'identifier': 'xyz', 'attachment_size': 123, 'unexpected': 'value', 'properties': 'value', }, }, } sql_case = { 'doc_type': 'CommCareCase', 'case_attachments': { 'xyz': { 'name': 'xyz', 'content_length': 123, 'content_type': 'ignored-sql', # for testing only, not an expected transformation 'properties': 'eulav', }, }, } diffs = json_diff(couch_case, sql_case, track_list_indices=False) filtered = filter_case_diffs(couch_case, sql_case, diffs) self.assertEqual(filtered, [ FormJsonDiff('missing', ('case_attachments', 'xyz', 'unexpected'), 'value', MISSING), FormJsonDiff('diff', ('case_attachments', 'xyz', 'properties'), 'value', 'eulav'), ])
def test_rules(): diff = FormJsonDiff("diff", ("node", ), "old", "new") old_obj = {"node": "old", "flag": True} new_obj = {"node": "new", "is_val": True} def match(rule): assert rule.matches(diff, old_obj, new_obj), (rule, diff) def no_match(rule): assert not rule.matches(diff, old_obj, new_obj), (rule, diff) yield match, Ignore("diff", ("node", ), old="old", new="new") yield match, Ignore("diff", "node", old="old", new="new") yield match, Ignore(ANY, "node", old="old", new="new") yield match, Ignore("diff", ANY, old="old", new="new") yield match, Ignore("diff", (ANY, ), old="old", new="new") yield match, Ignore("diff", "node", old=ANY, new="new") yield match, Ignore("diff", "node", old="old", new=ANY) yield match, Ignore("diff", "node", old="old") yield match, Ignore("diff", "node") yield match, Ignore("diff") yield match, Ignore(type="diff") yield match, Ignore(path="node") yield match, Ignore(old="old") yield match, Ignore(new="new") yield match, Ignore() yield no_match, Ignore(type="miss") yield no_match, Ignore(path=("key", )) yield no_match, Ignore(path="key") yield no_match, Ignore(old=1) yield no_match, Ignore(new=2) yield no_match, Ignore(old=MISSING) yield no_match, Ignore(new=MISSING) def is_flagged(old, new, rule, diff_): assert old is old_obj, old assert new is new_obj, new assert rule is check_rule, rule assert diff_ is diff, diff_ return old["flag"] check_rule = Ignore("diff", "node", old="old", new="new", check=is_flagged) yield match, check_rule def nope(old_obj, new_obj, rule, diff): return False yield no_match, Ignore("diff", "node", old="old", new="new", check=nope)
def test_filter_case_xform_id_diffs_good(self): couch_case = {'doc_type': 'CommCareCase', 'xform_ids': ['123', '456']} sql_case = {'doc_type': 'CommCareCase', 'xform_ids': ['456', '123']} diffs = json_diff(couch_case, sql_case, track_list_indices=False) self.assertEqual(2, len(diffs)) filtered = filter_case_diffs(couch_case, sql_case, diffs + REAL_DIFFS) self.assertEqual( set(filtered), set([ FormJsonDiff(diff_type='list_order', path=('xform_ids', '[*]'), old_value=None, new_value=None) ] + REAL_DIFFS))
def get_diffs(self): from corehq.apps.tzmigration.timezonemigration import FormJsonDiff session = self.Session() def json_loads_or_ellipsis(val): if val is None: return Ellipsis else: return json.loads(val) for d in session.query(PlanningDiff).all(): yield d.doc_id, FormJsonDiff( d.diff_type, json.loads(d.path), json_loads_or_ellipsis(d.old_value), json_loads_or_ellipsis(d.new_value))