예제 #1
0
def test_filter_documents_same_source_multiple_sources_in_update():
    root = {}
    head = {
        'documents': [
            {
                'source': 'arXiv',
                'key': 'old_file.pdf',
                'url': '/files/5678-5678-5678-5678/old_file.pdf',
            },
        ],
    }
    update = {
        'documents': [
            {
                'source': 'arXiv',
                'key': 'file1.pdf',
                'url': '/files/1234-1234-1234-1234/file1.pdf',
            },
            {
                'source': 'arXiv',
                'key': 'file2.pdf',
                'url': '/files/1234-1234-1234-1234/file2.pdf',
            },
            {
                'source': 'publisher',
                'key': 'file3.pdf',
                'url': '/files/1234-1234-1234-1234/file3.pdf',
            },
        ],
    }

    result = filter_records(root, head, update, filters=[filter_documents_same_source])
    expected = root, head, update

    assert result == expected
예제 #2
0
def test_filter_publisher_references_keeps_update_if_no_refs_in_head():
    root = {}
    head = {}
    update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }
    expected_update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }

    result = filter_records(root, head, update, filters=[filter_publisher_references])
    expected = root, head, expected_update

    assert result == expected
예제 #3
0
def test_filter_curated_references_takes_update_if_not_curated():
    root = {}
    head = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                },
            },
        ],
    }
    update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }
    expected_head = {}
    expected_update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }

    result = filter_records(root, head, update, filters=[filter_curated_references])
    expected = root, expected_head, expected_update

    assert result == expected
예제 #4
0
def test_filter_curated_references_keeps_update_if_head_almost_equal_to_root():
    root = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                },
                'curated_relation': False,
            },
        ],
    }
    head = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                    'misc': ['foo'],
                    'authors': ['Smith, J.'],
                },
                'raw_refs': [
                    {
                        'source': 'arXiv',
                        'schema': 'text',
                        'value': 'foo 1810.12345',
                    },
                ]
            },
        ],
    }
    update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }
    expected_root = {}
    expected_head = {}
    expected_update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }

    result = filter_records(root, head, update, filters=[filter_curated_references])
    expected = expected_root, expected_head, expected_update

    assert result == expected
예제 #5
0
def merge(root, head, update, head_source=None):
    """
    This function instantiate a ``Merger`` object using a configuration in
    according to the ``source`` value of head and update params.
    Then it run the merger on the three files provided in input.

    Params
        root(dict): the last common parent json of head and update
        head(dict): the last version of a record in INSPIRE
        update(dict): the update coming from outside INSPIRE to merge
        head_source(string): the source of the head record. If ``None``,
            heuristics are used to derive it from the metadata. This is useful
            if the HEAD came from legacy and the acquisition_source does not
            reflect the state of the record.

    Return
        A tuple containing the resulted merged record in json format and a
        an object containing all generated conflicts.
    """
    configuration = get_configuration(head, update, head_source)
    conflicts = []

    root, head, update = filter_records(root,
                                        head,
                                        update,
                                        filters=configuration.pre_filters)
    merger = Merger(
        root=root,
        head=head,
        update=update,
        default_dict_merge_op=configuration.default_dict_merge_op,
        default_list_merge_op=configuration.default_list_merge_op,
        list_dict_ops=configuration.list_dict_ops,
        list_merge_ops=configuration.list_merge_ops,
        comparators=configuration.comparators,
    )

    try:
        merger.merge()
    except MergeError as e:
        conflicts = e.content
    conflicts = filter_conflicts(conflicts, configuration.conflict_filters)
    conflicts_as_json = [json.loads(c.to_json()) for c in conflicts]
    flat_conflicts_as_json = list(
        itertools.chain.from_iterable(conflicts_as_json))

    merged = merger.merged_root
    return merged, flat_conflicts_as_json
예제 #6
0
def test_filter_missing_figures_on_update_are_properly_handled():
    fig_1 = {
        'caption': 'CC',
        'key': 'w0_bflow.png',
        'label': 'fig:bflow',
        'material': 'preprint',
        'source': 'arxiv',
        'url': '/api/files/8e2b4d59-6870-4517-8580-35822bf12edb/w0_bflow.png'
    }
    fig_2 = {
        'caption': 'CC2',
        'key': 'w1_bflow.png',
        'label': 'fig2:bflow',
        'material': 'preprint',
        'source': 'other',
        'url': '/api/files/8e2b4d59-6870-4517-8888-35822bf12edb/w1_bflow.png'
    }
    fig_3 = {
        'caption': 'CC',
        'key': '627d2caea8059d8875281ebed455a714',
        'label': 'fig:bflow',
        'material': 'preprint',
        'source': 'arxiv',
        'url': '/api/files/8e2b4d59-6870-4517-8580-35822bf12edb/w0_bflow.png'
    }
    root = {
        "figures": [
            fig_1, fig_2

        ]
    }
    head = {
        "figures": [
            fig_2, fig_3
        ]
    }

    update = {'acquisition_source': {'source': 'arXiv'}}

    expected_root = {"figures": [fig_2]}
    expected_head = {"figures": [fig_2]}
    expected_update = update

    new_root, new_head, new_update = filter_records(root, head, update, filters=[filter_figures_same_source])
    assert new_root == expected_root
    assert new_head == expected_head
    assert new_update == expected_update
예제 #7
0
def test_filter_curated_references_keeps_head_if_differs_from_root():
    root = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                },
            },
        ],
    }
    head = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                    'dois': ['10.1234/5678'],
                },
            },
        ],
    }
    update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }
    expected_root = {}
    expected_head = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.12345',
                    'dois': ['10.1234/5678'],
                },
            },
        ],
    }
    expected_update = {}

    result = filter_records(root, head, update, filters=[filter_curated_references])
    expected = expected_root, expected_head, expected_update

    assert result == expected
예제 #8
0
def test_filter_documents_same_source_is_case_insensitive_on_source():
    root = {}
    head = {
        'documents': [
            {
                'source': 'arXiv',
                'key': 'file1.pdf',
                'url': '/files/1234-1234-1234-1234/file1.pdf',
            },
            {
                'source': 'arXiv',
                'key': 'file2.pdf',
                'url': '/files/1234-1234-1234-1234/file2.pdf',
            },
            {
                'key': 'file3.pdf',
                'url': '/files/1234-1234-1234-1234/file3.pdf',
            },
        ],
    }
    update = {
        'documents': [
            {
                'source': 'arxiv',
                'key': 'new_file.pdf',
                'url': '/files/5678-5678-5678-5678/new_file.pdf',
            },
        ],
    }
    expected_head = {
        'documents': [
            {
                'key': 'file3.pdf',
                'url': '/files/1234-1234-1234-1234/file3.pdf',
            },
        ],
    }

    result = filter_records(root, head, update, filters=[filter_documents_same_source])
    expected = root, expected_head, update

    assert result == expected
예제 #9
0
def test_filter_curated_references_keeps_head_if_legacy_curated():
    root = {}
    head = {
        'references': [
            {
                'legacy_curated': True,
                'reference': {
                    'arxiv_eprint': '1810.12345',
                },
            },
        ],
    }
    update = {
        'references': [
            {
                'reference': {
                    'arxiv_eprint': '1810.56789',
                },
            },
        ],
    }
    expected_head = {
        'references': [
            {
                'legacy_curated': True,
                'reference': {
                    'arxiv_eprint': '1810.12345',
                },
            },
        ],
    }
    expected_update = {}

    result = filter_records(root, head, update, filters=[filter_curated_references])
    expected = root, expected_head, expected_update

    assert result == expected