def test_merge_dict_with_keep_longest(): r = {} h = { 'a': 'A short string', 'b': 'An extremely long string', } u = { 'a': 'A much longer string', 'b': 'Another short string', 'c': 'Other string', } m = Merger(r, h, u, DictMergerOps.keep_longest, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) expected = { 'a': 'A much longer string', 'b': 'An extremely long string', 'c': 'Other string', } with pytest.raises(MergeError): m.merge() assert m.merged_root == expected
def test_merge_list_with_keep_longest(): r = { 'a': [ { 'b': 'One string', }, ], } h = { 'a': [ { 'b': 'A different string', }, ], } u = {} m = Merger(r, h, u, DictMergerOps.keep_longest, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) expected = { 'a': [ { 'b': 'A different string', }, ], } # as len(h['a']) > len(u['a']) and strategy is keep_longest with pytest.raises(MergeError): m.merge() assert m.merged_root == expected
def test_merge_bare_int_lists(): r = [1, 2, 3] h = [1, 2, 3, 4] u = [1, 2, 5] m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) m.merge() assert m.merged_root == [1, 2, 5]
def test_merge_bare_str_lists(): r = ['1', '2', '3'] h = ['1', '2', '3', '4'] u = ['1', '2', '5'] m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) m.merge() assert m.merged_root == ['1', '2', '5']
def test_merge_nested_lists(): r = [[1], [2], [3]] h = [[1], [2], [3], [4]] u = [[1], [2], [5]] m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) m.merge() assert m.merged_root == [[1], [2], [5]]
def test_merge_root_is_not_list(): r = 'randomstring' h = [[1], [2, 3], [5]] u = [[1], [2, 3], [5]] m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) m.merge() # Here the lists are aligned as entities and lists of entities. assert m.merged_root == [[1], [2, 3], [5]]
def test_merge_str_with_keep_longest(): r = {} h = 'A short string' u = 'A much longer string' m = Merger(r, h, u, DictMergerOps.keep_longest, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) with pytest.raises(MergeError): m.merge() assert m.merged_root == 'A much longer string'
def test_merge_list_with_string(): r = 'somerandomvalue' h = [1, 2, 3] u = 'a given string' m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD, UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) with pytest.raises(MergeError) as excinfo: m.merge() assert m.merged_root == [1, 2, 3] assert len(excinfo.value.content) == 1 assert excinfo.value.content[0] == Conflict(ConflictType.SET_FIELD, (), 'a given string')
def test_keep_head_conflict_on_new_update(): r = [1] h = [1, 2] u = [3] m = Merger(r, h, u, DictMergerOps.keep_longest, UnifierOps.KEEP_HEAD_ENTITIES_CONFLICT_ON_NEW_UPDATE) with pytest.raises(MergeError): m.merge() expected_merge = [1, 2] expected_conflict = [('INSERT', (0, ), 3)] assert m.merged_root == expected_merge assert m.conflicts == expected_conflict
def show_fixture(dirname, fixture): root = json.loads(_read_fixture(dirname, fixture, 'root.json')) head = json.loads(_read_fixture(dirname, fixture, 'head.json')) update = json.loads(_read_fixture(dirname, fixture, 'update.json')) description = _read_fixture(dirname, fixture, 'description.txt') listop = request.args.get('listop', UnifierOps.KEEP_ONLY_UPDATE_ENTITIES) dictop = request.args.get('dictop', DictMergerOps.FALLBACK_KEEP_HEAD) merger = Merger(root, head, update, dictop, listop, comparators=COMPARATORS, list_merge_ops=LIST_MERGE_OPS) conflicts = None try: merger.merge() except MergeError as e: conflicts = [json.loads(c.to_json()) for c in e.content] merged = merger.merged_root rh_root, rh_head = build_root_diff(merger.aligned_root, merger.aligned_head, merger.head_stats) ru_root, ru_update = build_root_diff(merger.aligned_root, merger.aligned_update, merger.update_stats) hm_merged, hm_head = build_merged_diff(merged, merger.aligned_head, merger.head_stats) merge_info = { 'root': root, 'head': head, 'update': update, 'rhRoot': rh_root, 'rhHead': rh_head, 'ruRoot': ru_root, 'ruUpdate': ru_update, 'hmMerged': hm_merged, 'hmHead': hm_head, 'conflicts': conflicts} # Some weird introspection dictops = [d for d in DictMergerOps.__dict__ if not d.startswith('_') and 'allowed' not in d.lower()] listops = [l for l in UnifierOps.__dict__ if not l.startswith('_') and 'allowed' not in l.lower()] return render_template('diff.html', description=description, listops=listops, dictops=dictops, dictop=dictop, listop=listop, merge_info=merge_info)
def merge(root, head, update, head_source=None): """ This function instantiate a ``Merger`` object using a configuration in according to the ``source`` value of head and update params. Then it run the merger on the three files provided in input. Params root(dict): the last common parent json of head and update head(dict): the last version of a record in INSPIRE update(dict): the update coming from outside INSPIRE to merge head_source(string): the source of the head record. If ``None``, heuristics are used to derive it from the metadata. This is useful if the HEAD came from legacy and the acquisition_source does not reflect the state of the record. Return A tuple containing the resulted merged record in json format and a an object containing all generated conflicts. """ configuration = get_configuration(head, update, head_source) conflicts = [] root, head, update = filter_records(root, head, update, filters=configuration.pre_filters) merger = Merger( root=root, head=head, update=update, default_dict_merge_op=configuration.default_dict_merge_op, default_list_merge_op=configuration.default_list_merge_op, list_dict_ops=configuration.list_dict_ops, list_merge_ops=configuration.list_merge_ops, comparators=configuration.comparators, ) try: merger.merge() except MergeError as e: conflicts = e.content conflicts = filter_conflicts(conflicts, configuration.conflict_filters) conflicts_as_json = [json.loads(c.to_json()) for c in conflicts] flat_conflicts_as_json = list( itertools.chain.from_iterable(conflicts_as_json)) merged = merger.merged_root return merged, flat_conflicts_as_json
def json_merger_arxiv_to_arxiv(root, head, update): merger = Merger( root, head, update, DictMergerOps.FALLBACK_KEEP_UPDATE, # Most common operation UnifierOps.KEEP_ONLY_UPDATE_ENTITIES, comparators=COMPARATORS, list_merge_ops=LIST_MERGE_OPS, list_dict_ops=FIELD_MERGE_OPS) conflicts = None try: merger.merge() except MergeError as e: conflicts = [json.loads(c.to_json()) for c in e.content] merged = merger.merged_root return merged, conflicts
def inspire_json_merge(root, head, update): """ This function instantiate a ``Merger`` object using a configuration in according to the ``source`` value of head and update params. Then it run the merger on the three files provided in input. Params root(dict): the last common parent json of head and update head(dict): the last version of a record in INSPIRE update(dict): the update coming from outside INSPIRE to merge Return A tuple containing the resulted merged record in json format and a an object containing all generated conflicts. """ configuration = _get_configuration(get_source(head), get_source(update)) conflicts = None merger = Merger( root=root, head=head, update=update, default_dict_merge_op=configuration.default_dict_merge_op, default_list_merge_op=configuration.default_list_merge_op, list_dict_ops=configuration.list_dict_ops, list_merge_ops=configuration.list_merge_ops, comparators=configuration.comparators, ) try: merger.merge() except MergeError as e: conflicts = sort_conflicts( [json.loads(confl.to_json()) for confl in e.content]) merged = merger.merged_root return merged, conflicts