def test_merge_dict_with_keep_longest():
    r = {}
    h = {
        'a': 'A short string',
        'b': 'An extremely long string',
    }
    u = {
        'a': 'A much longer string',
        'b': 'Another short string',
        'c': 'Other string',
    }

    m = Merger(r, h, u,
               DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    expected = {
        'a': 'A much longer string',
        'b': 'An extremely long string',
        'c': 'Other string',
    }

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == expected
Exemple #2
0
def test_merge_dict_with_keep_longest():
    r = {}
    h = {
        'a': 'A short string',
        'b': 'An extremely long string',
    }
    u = {
        'a': 'A much longer string',
        'b': 'Another short string',
        'c': 'Other string',
    }

    m = Merger(r, h, u, DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    expected = {
        'a': 'A much longer string',
        'b': 'An extremely long string',
        'c': 'Other string',
    }

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == expected
def test_merge_list_with_keep_longest():
    r = {
        'a': [
            {
                'b': 'One string',
            },
        ],
    }
    h = {
        'a': [
            {
                'b': 'A different string',
            },
        ],
    }
    u = {}

    m = Merger(r, h, u,
               DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    expected = {
        'a': [
            {
                'b': 'A different string',
            },
        ],
    }  # as len(h['a']) > len(u['a']) and strategy is keep_longest

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == expected
Exemple #4
0
def test_merge_list_with_keep_longest():
    r = {
        'a': [
            {
                'b': 'One string',
            },
        ],
    }
    h = {
        'a': [
            {
                'b': 'A different string',
            },
        ],
    }
    u = {}

    m = Merger(r, h, u, DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    expected = {
        'a': [
            {
                'b': 'A different string',
            },
        ],
    }  # as len(h['a']) > len(u['a']) and strategy is keep_longest

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == expected
Exemple #5
0
def test_merge_bare_int_lists():
    r = [1, 2, 3]
    h = [1, 2, 3, 4]
    u = [1, 2, 5]

    m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    assert m.merged_root == [1, 2, 5]
Exemple #6
0
def test_merge_bare_str_lists():
    r = ['1', '2', '3']
    h = ['1', '2', '3', '4']
    u = ['1', '2', '5']

    m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    assert m.merged_root == ['1', '2', '5']
def test_merge_bare_int_lists():
    r = [1, 2, 3]
    h = [1, 2, 3, 4]
    u = [1, 2, 5]

    m = Merger(r, h, u,
               DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    assert m.merged_root == [1, 2, 5]
Exemple #8
0
def test_merge_nested_lists():
    r = [[1], [2], [3]]
    h = [[1], [2], [3], [4]]
    u = [[1], [2], [5]]

    m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()

    assert m.merged_root == [[1], [2], [5]]
Exemple #9
0
def test_merge_root_is_not_list():
    r = 'randomstring'
    h = [[1], [2, 3], [5]]
    u = [[1], [2, 3], [5]]

    m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    # Here the lists are aligned as entities and lists of entities.
    assert m.merged_root == [[1], [2, 3], [5]]
Exemple #10
0
def test_merge_bare_str_lists():
    r = ['1', '2', '3']
    h = ['1', '2', '3', '4']
    u = ['1', '2', '5']

    m = Merger(r, h, u,
               DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    assert m.merged_root == ['1', '2', '5']
Exemple #11
0
def test_merge_nested_lists():
    r = [[1], [2], [3]]
    h = [[1], [2], [3], [4]]
    u = [[1], [2], [5]]

    m = Merger(r, h, u,
               DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()

    assert m.merged_root == [[1], [2], [5]]
Exemple #12
0
def test_merge_root_is_not_list():
    r = 'randomstring'
    h = [[1], [2, 3], [5]]
    u = [[1], [2, 3], [5]]

    m = Merger(r, h, u,
               DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    m.merge()
    # Here the lists are aligned as entities and lists of entities.
    assert m.merged_root == [[1], [2, 3], [5]]
Exemple #13
0
def test_merge_str_with_keep_longest():
    r = {}
    h = 'A short string'
    u = 'A much longer string'

    m = Merger(r, h, u, DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == 'A much longer string'
Exemple #14
0
def test_merge_str_with_keep_longest():
    r = {}
    h = 'A short string'
    u = 'A much longer string'

    m = Merger(r, h, u,
               DictMergerOps.keep_longest,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)

    with pytest.raises(MergeError):
        m.merge()

    assert m.merged_root == 'A much longer string'
Exemple #15
0
def test_merge_list_with_string():
    r = 'somerandomvalue'
    h = [1, 2, 3]
    u = 'a given string'

    m = Merger(r, h, u, DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    with pytest.raises(MergeError) as excinfo:
        m.merge()

    assert m.merged_root == [1, 2, 3]
    assert len(excinfo.value.content) == 1
    assert excinfo.value.content[0] == Conflict(ConflictType.SET_FIELD, (),
                                                'a given string')
Exemple #16
0
def test_keep_head_conflict_on_new_update():
    r = [1]
    h = [1, 2]
    u = [3]

    m = Merger(r, h, u, DictMergerOps.keep_longest,
               UnifierOps.KEEP_HEAD_ENTITIES_CONFLICT_ON_NEW_UPDATE)
    with pytest.raises(MergeError):
        m.merge()

    expected_merge = [1, 2]
    expected_conflict = [('INSERT', (0, ), 3)]
    assert m.merged_root == expected_merge
    assert m.conflicts == expected_conflict
def show_fixture(dirname, fixture):
    root = json.loads(_read_fixture(dirname, fixture, 'root.json'))
    head = json.loads(_read_fixture(dirname, fixture, 'head.json'))
    update = json.loads(_read_fixture(dirname, fixture, 'update.json'))
    description = _read_fixture(dirname, fixture, 'description.txt')
    listop = request.args.get('listop', UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    dictop = request.args.get('dictop', DictMergerOps.FALLBACK_KEEP_HEAD)
    merger = Merger(root, head, update,
                    dictop, listop,
                    comparators=COMPARATORS,
                    list_merge_ops=LIST_MERGE_OPS)
    conflicts = None
    try:
        merger.merge()
    except MergeError as e:
        conflicts = [json.loads(c.to_json()) for c in e.content]
    merged = merger.merged_root

    rh_root, rh_head = build_root_diff(merger.aligned_root,
                                       merger.aligned_head,
                                       merger.head_stats)
    ru_root, ru_update = build_root_diff(merger.aligned_root,
                                         merger.aligned_update,
                                         merger.update_stats)
    hm_merged, hm_head = build_merged_diff(merged, merger.aligned_head,
                                           merger.head_stats)

    merge_info = {
            'root': root,
            'head': head,
            'update': update,
            'rhRoot': rh_root,
            'rhHead': rh_head,
            'ruRoot': ru_root,
            'ruUpdate': ru_update,
            'hmMerged': hm_merged,
            'hmHead': hm_head,
            'conflicts': conflicts}

    # Some weird introspection
    dictops = [d for d in DictMergerOps.__dict__ if not d.startswith('_') and 'allowed' not in d.lower()]
    listops = [l for l in UnifierOps.__dict__ if not l.startswith('_') and 'allowed' not in l.lower()]
    return render_template('diff.html',
                           description=description,
                           listops=listops,
                           dictops=dictops,
                           dictop=dictop,
                           listop=listop,
                           merge_info=merge_info)
Exemple #18
0
def test_merge_list_with_string():
    r = 'somerandomvalue'
    h = [1, 2, 3]
    u = 'a given string'

    m = Merger(r, h, u,
               DictMergerOps.FALLBACK_KEEP_HEAD,
               UnifierOps.KEEP_ONLY_UPDATE_ENTITIES)
    with pytest.raises(MergeError) as excinfo:
        m.merge()

    assert m.merged_root == [1, 2, 3]
    assert len(excinfo.value.content) == 1
    assert excinfo.value.content[0] == Conflict(ConflictType.SET_FIELD, (),
                                                'a given string')
Exemple #19
0
def merge(root, head, update, head_source=None):
    """
    This function instantiate a ``Merger`` object using a configuration in
    according to the ``source`` value of head and update params.
    Then it run the merger on the three files provided in input.

    Params
        root(dict): the last common parent json of head and update
        head(dict): the last version of a record in INSPIRE
        update(dict): the update coming from outside INSPIRE to merge
        head_source(string): the source of the head record. If ``None``,
            heuristics are used to derive it from the metadata. This is useful
            if the HEAD came from legacy and the acquisition_source does not
            reflect the state of the record.

    Return
        A tuple containing the resulted merged record in json format and a
        an object containing all generated conflicts.
    """
    configuration = get_configuration(head, update, head_source)
    conflicts = []

    root, head, update = filter_records(root,
                                        head,
                                        update,
                                        filters=configuration.pre_filters)
    merger = Merger(
        root=root,
        head=head,
        update=update,
        default_dict_merge_op=configuration.default_dict_merge_op,
        default_list_merge_op=configuration.default_list_merge_op,
        list_dict_ops=configuration.list_dict_ops,
        list_merge_ops=configuration.list_merge_ops,
        comparators=configuration.comparators,
    )

    try:
        merger.merge()
    except MergeError as e:
        conflicts = e.content
    conflicts = filter_conflicts(conflicts, configuration.conflict_filters)
    conflicts_as_json = [json.loads(c.to_json()) for c in conflicts]
    flat_conflicts_as_json = list(
        itertools.chain.from_iterable(conflicts_as_json))

    merged = merger.merged_root
    return merged, flat_conflicts_as_json
Exemple #20
0
def json_merger_arxiv_to_arxiv(root, head, update):
    merger = Merger(
        root,
        head,
        update,
        DictMergerOps.FALLBACK_KEEP_UPDATE,  # Most common operation
        UnifierOps.KEEP_ONLY_UPDATE_ENTITIES,
        comparators=COMPARATORS,
        list_merge_ops=LIST_MERGE_OPS,
        list_dict_ops=FIELD_MERGE_OPS)
    conflicts = None
    try:
        merger.merge()
    except MergeError as e:
        conflicts = [json.loads(c.to_json()) for c in e.content]
    merged = merger.merged_root

    return merged, conflicts
def inspire_json_merge(root, head, update):
    """
    This function instantiate a ``Merger`` object using a configuration in
    according to the ``source`` value of head and update params.
    Then it run the merger on the three files provided in input.

    Params
        root(dict): the last common parent json of head and update
        head(dict): the last version of a record in INSPIRE
        update(dict): the update coming from outside INSPIRE to merge

    Return
        A tuple containing the resulted merged record in json format and a
        an object containing all generated conflicts.
    """
    configuration = _get_configuration(get_source(head), get_source(update))

    conflicts = None
    merger = Merger(
        root=root,
        head=head,
        update=update,
        default_dict_merge_op=configuration.default_dict_merge_op,
        default_list_merge_op=configuration.default_list_merge_op,
        list_dict_ops=configuration.list_dict_ops,
        list_merge_ops=configuration.list_merge_ops,
        comparators=configuration.comparators,
    )

    try:
        merger.merge()
    except MergeError as e:
        conflicts = sort_conflicts(
            [json.loads(confl.to_json()) for confl in e.content])

    merged = merger.merged_root
    return merged, conflicts