def test_comparing_publication_info_with_cnum(): root = {} head = { 'publication_info': [{ "artid": "WEPAB127", "cnum": "C21-05-24.3", "conf_acronym": "IPAC2021", "year": 2021 }] } update = { 'publication_info': [{ "artid": "WEPAB127", "cnum": "C21-05-24.3", "conf_acronym": "IPAC2021", "conference_record": { "$ref": "https://inspirehep.net/api/conferences/1853162" }, "year": 2021 }] } expected_conflict = [] expected_merged = update root, head, update, expected_merged = add_arxiv_source( root, head, update, expected_merged) merged, conflict = merge(root, head, update, head_source='arxiv') merged = add_arxiv_source(merged) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_full_name_field_keeps_longest_name(): root = { 'authors': [{ 'full_name': 'Pitts Kevin', }] } head = { 'authors': [{ 'full_name': 'Pitts, Kevin John', }] } update = { 'authors': [{ 'full_name': 'Pitts, Kevin', }] } expected_merged = head expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_comparing_authors_unicode_name(): root = {} head = { 'authors': [ { 'full_name': 'Ortín, Tomás' }, ], } update = { 'authors': [ { 'full_name': 'Ortin, Tomas' }, ], } expected_conflict = [] expected_merged = head root, head, update, expected_merged = add_arxiv_source( root, head, update, expected_merged) merged, conflict = merge(root, head, update, head_source='arxiv') merged = add_arxiv_source(merged) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def merge_records(obj, eng): """Perform a manual merge. Merges two records stored in the workflow object as the content of the ``head`` and ``update`` keys, and stores the result in ``obj.data``. Also stores the eventual conflicts in ``obj.extra_data['conflicts']``. Because this is a manual merge we assume that the two records have no common ancestor, so ``root`` is the empty dictionary. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ head, update = obj.extra_data['head'], obj.extra_data['update'] head_source = obj.extra_data['head_source'] merged, conflicts = merge( root={}, head=head, update=update, head_source=head_source, ) obj.data = merged obj.extra_data['conflicts'] = conflicts obj.save()
def test_merging_acquisition_source_publisher_on_arxiv(fake_get_config): root = { "acquisition_source": { "datetime": "2021-05-11T02:35:43.387350", "method": "hepcrawl", "source": "arXiv", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0" } } head = { "acquisition_source": { "datetime": "2021-05-11T02:35:43.387350", "method": "hepcrawl", "source": "arXiv", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0" } } update = { "acquisition_source": { "datetime": "2021-05-12T02:35:43.387350", "method": "beard", "source": "other source", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c1" } } expected_merged = update expected_conflict = [] merged, conflict = merge(root, head, update) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def merge_article_with_crossref_data(article): doi = get_value(article, "dois[0].value") if not doi: return article try: crossref_data = import_doi(doi) except (ImportConnectionError, ImportParsingError): LOGGER.exception("Cannot merge submission with %r,", doi) return article merged, conflicts = merge(root={}, head=article, update=crossref_data) try: validate(merged, "hep") except ValidationError: LOGGER.exception( "Merger returned invalid data while merging imported arxiv with crossref", doi=doi, arxiv=get_value(article, "arxiv_eprints[0].value"), ) return article if conflicts: LOGGER.debug("Ignoring conflicts while enhancing submission.\n%r", conflicts) return merged
def test_merging_same_documents_arxiv_on_arxiv(fake_get_config): root = { "documents": [ { "key": "pdf1.pdf", "description": "paper", "source": "arXiv", "fulltext": True, "url": "http://example.com/files/1234-1234-1234-1234/pdf1.pdf", }, { "key": "pdf.tex", "description": "latex version", "source": "arXiv", "url": "http://example.com/files/1234-1234-1234-1234/pdf.tex", }, ] } head = root update = root expected_merged = head expected_conflict = [] merged, conflict = merge(root, head, update) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_cleans_acquisition_source_for_arxiv_on_publisher( fake_get_config): root = { "acquisition_source": { "datetime": "2021-05-11T02:35:43.387350", "method": "arXiv", "source": "arXiv", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0" } } head = { "acquisition_source": { "datetime": "2021-05-11T02:35:43.387350", "method": "hepcrawl", "source": "desy", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c0" } } update = { "acquisition_source": { "datetime": "2021-05-12T02:35:43.387350", "method": "hepcrawl", "source": "arXiv", "submission_number": "c8a0e3e0b20011eb8d930a580a6402c1" } } merged, conflict = merge(root, head, update) assert merged['acquisition_source']['source'] == 'arXiv'
def test_merging_inspire_categories_field(): root = { 'inspire_categories': [{ 'source': 'INSPIRE', 'term': 'Theory-HEP' }] } head = { 'inspire_categories': [{ 'source': 'curator', 'term': 'Theory-HEP' }, { 'source': 'curator', 'term': 'Theory-Nucl' }] } update = { 'inspire_categories': [{ 'source': 'arxiv', 'term': 'Computing' }, { 'source': 'arxiv', 'term': 'Other' }] } expected_merged = head expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_dois_field_handles_repeated_values(): root = { 'dois': [{ 'material': 'preprint', 'value': '10.1023/A:1026654312961' }] } head = { 'dois': [ { 'material': 'publication', 'value': '10.1023/A:1026654312961' }, { 'source': 'nowhere', 'value': '10.1023/B:1026654312961' }, ] } update = { 'dois': [ { 'material': 'erratum', 'value': '10.1023/A:1026654312961' }, { 'material': 'erratum', 'source': 'nowhere', 'value': '10.1023/B:1026654312961' }, ] } expected_merged = { 'dois': [ { 'material': 'publication', 'value': '10.1023/A:1026654312961' }, { 'source': 'nowhere', 'value': '10.1023/B:1026654312961' }, { 'material': 'erratum', 'value': '10.1023/A:1026654312961' }, { 'material': 'erratum', 'source': 'nowhere', 'value': '10.1023/B:1026654312961' }, ] } expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_comparing_publication_info(): root = {} head = { 'publication_info': [ { 'journal_title': 'J. Testing', 'journal_volume': '42', } ] } update = { 'publication_info': [ { 'journal_title': 'J. Testing', 'journal_volume': '42', 'artid': 'foo', } ] } expected_conflict = [] expected_merged = update root, head, update, expected_merged = add_arxiv_source(root, head, update, expected_merged) merged, conflict = merge(root, head, update, head_source='arxiv') merged = add_arxiv_source(merged) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_titles_field(): root = { 'titles': [{ 'source': 'arXiv', 'title': 'ANTARES: An observatory at the seabed ' 'to the confines of the Universe' } # record: 1519935 ] } head = { 'titles': [{ 'source': 'arXiv', 'subtitle': 'this subtitle has been added by a curator', 'title': 'ANTARES: An observatory at the seabed ' 'to the confines of the Universe' }] } update = { 'titles': [ { 'source': 'arXiv', 'title': 'ANTARES: Un osservatorio foo bar' }, ] } expected_merged = { 'titles': [ { 'source': 'arXiv', 'subtitle': 'this subtitle has been added by a curator', 'title': 'ANTARES: An observatory at the seabed ' 'to the confines of the Universe' }, ] } expected_conflict = [{ 'path': '/titles/0', 'op': 'add', 'value': { 'source': 'arXiv', 'title': 'ANTARES: Un osservatorio foo bar' }, '$type': 'INSERT' }] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_grobid_on_arxiv_operations_keeps_authors_from_head(): root = {} authors_arxiv = { "authors": [ { "full_name": "Kowal, Michal", "raw_affiliations": [{ "value": "Warsaw U." }], "emails": [ "*****@*****.**" ], } ] } authors_grobid = { "authors": [ { "full_name": "Kowal, Michal", "raw_affiliations": [{ "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland" }], "emails": [ "*****@*****.**" ], }, { "full_name": "Sułkowski, Piotr Andrzej", "raw_affiliations": [{ "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland" }], "emails": [ "*****@*****.**" ], } ] } expected_merged = { "authors": [ { "raw_affiliations": [{ "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland" }], "emails": [ "*****@*****.**" ], "full_name": "Kowal, Michal" } ] } merged, conflicts = merge(root, authors_arxiv, authors_grobid, configuration=GrobidOnArxivAuthorsOperations) assert not conflicts assert merged == expected_merged
def test_merger_handles_authors_with_correct_ordering(): root = {} head = { "authors": [ { 'full_name': 'Janeway, Kathryn', 'age': 44 }, { 'full_name': 'Picard, Jean-Luc', 'age': 55 }, { "full_name": "Archer, Jonathan", } ], } update = { "authors": [ { "full_name": "Kirk, James" }, { 'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66 }, { "full_name": "Archer, Jonathan", } ], } expected_conflict = [ { 'path': '/authors/1', 'op': 'replace', 'value': {'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66}, '$type': 'SET_FIELD' }, { 'path': '/authors/2', 'op': 'replace', 'value': {'full_name': 'Janeway Kathryn, Picard Jean-Luc', 'age': 66}, '$type': 'SET_FIELD' }, ] expected_merged = {'authors': [ {"full_name": "Kirk, James"}, {'age': 44, 'full_name': 'Janeway, Kathryn'}, {'age': 55, 'full_name': 'Picard, Jean-Luc'}, {"full_name": "Archer, Jonathan"} ]} merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert conflict.sort(key=itemgetter('path')) == expected_conflict.sort(key=itemgetter('path'))
def test_comparing_references_field_different_dois(): root = {} head = { 'references': [ { 'reference': { 'dois': [ '10.1099/bar', ], } } ] } update = { 'references': [ { 'reference': { 'dois': [ '10.1099/foo', ], 'document_type': 'article', } } ] } expected_conflict = [] expected_merged = { 'references': [ { 'reference': { 'dois': [ '10.1099/bar', ], } }, { 'reference': { 'dois': [ '10.1099/foo', ], 'document_type': 'article', } } ] } root, head, update, expected_merged = add_arxiv_source(root, head, update, expected_merged) merged, conflict = merge(root, head, update, head_source='arxiv') merged = add_arxiv_source(merged) assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merger_handles_list_deletions(): root = { 'book_series': [ { 'title': 'IEEE Nucl.Sci.Symp.Conf.Rec.', 'volume': '1' }, { 'title': 'CMS Web-Based Monitoring', 'volume': '2' }, { 'title': 'Lectures in Testing', 'volume': '3', }, ] } head = {} update = { 'book_series': [ { 'title': 'Lectures in Testing', 'volume': '3', }, ] } expected_merged = head expected_conflict = [{ 'path': '/book_series/0/volume', 'op': 'replace', 'value': '3', '$type': 'SET_FIELD' }, { 'path': '/book_series/0/title', 'op': 'replace', 'value': 'Lectures in Testing', '$type': 'SET_FIELD' }, { 'path': '/book_series/2', 'op': 'remove', 'value': None, '$type': 'REMOVE_FIELD' }, { 'path': '/book_series/1', 'op': 'remove', 'value': None, '$type': 'REMOVE_FIELD' }] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert sorted(conflict, key=operator.itemgetter("path")) == sorted( expected_conflict, key=operator.itemgetter("path"))
def test_figures(): root = {} head = { 'figures': [{ 'key': 'figure1.png', 'caption': 'Figure 1', 'source': 'arXiv', 'url': 'http://example.comfiles/1234-1234-1234-1234/figure1.png', }, { 'key': 'figure2.png', 'caption': 'Figure 2', 'source': 'arXiv', 'url': 'http://example.com/files/1234-1234-1234-1234/figure2.png', }] } update = { 'figures': [{ 'key': 'new_figure1.png', 'caption': 'Figure 1', 'source': 'arXiv', 'url': 'http://example.com/files/5678-5678-5678-5678/figure1.png', }, { 'key': 'new_figure2.png', 'caption': 'Figure 2', 'source': 'arXiv', 'url': 'http://example.com/files/5678-5678-5678-5678/figure2.png', }] } expected_merged = update expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_ordering_conflicts(): # This test is actually for broken input. # Where authors are duplicated. root = load_test_data("test_data/root.json") head = load_test_data("test_data/head.json") update = load_test_data("test_data/update.json") expected_conflicts = load_test_data("test_data/conflicts.json") expected_merged = load_test_data("test_data/merged.json") merged, conflicts = merge(root, head, update) assert sorted(merged['authors'], key=itemgetter('uuid')) == sorted(expected_merged['authors'], key=itemgetter('uuid')) assert_ordered_conflicts(conflicts, expected_conflicts)
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid head_record = InspireRecord.get_record(head_uuid) update = obj.data update_source = LiteratureReader(obj.data).source head_root = read_wf_record_source(record_uuid=head_record.id, source=update_source.lower()) head_root = head_root.json if head_root else {} obj.extra_data['head_uuid'] = str(head_uuid) obj.extra_data['head_version_id'] = head_record.model.version_id obj.extra_data['merger_head_revision'] = head_record.revision_id obj.extra_data['merger_original_root'] = deepcopy(head_root) merged, conflicts = merge( head=head_record.to_dict(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['conflicts_metadata'] = { 'datetime': datetime.now().strftime("%b %d, %Y, %H:%M:%S %p"), 'update_source': update_source, } obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def test_documents(): root = {} head = { 'documents': [ { 'key': 'pdf1.pdf', 'description': 'paper', 'source': 'arXiv', 'fulltext': True, 'url': 'http://example.com/files/1234-1234-1234-1234/pdf1.pdf', }, { 'key': 'pdf.tex', 'description': 'latex version', 'source': 'arXiv', 'url': 'http://example.com/files/1234-1234-1234-1234/pdf.tex', }, ] } update = { 'documents': [{ 'key': 'pdf.pdf', 'description': 'paper', 'source': 'arXiv', 'url': 'http://example.com/files/5678-5678-5678-5678/pdf.pdf', }, { 'key': 'foo.xml', 'description': 'some xml files', 'source': 'arXiv', 'url': 'http://example.com/files/5678-5678-5678-5678/foo.xml', }] } expected_merged = update expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_head_curates_author_no_duplicate(): # https://labs.inspirehep.net/api/holdingpen/1268973 root = { 'authors': [ { "full_name": "Li, Zhengxiang" }, ] } head = { "authors": [{ "affiliations": [{ "value": "Beijing Normal U." }], "full_name": "Li, Zheng-Xiang", }] } update = { 'authors': [ { "full_name": "Li, Zhengxiang" }, ] } expected_merged = { 'authors': [{ 'full_name': 'Li, Zhengxiang' }, { 'full_name': 'Li, Zheng-Xiang', 'affiliations': [{ 'value': 'Beijing Normal U.' }] }] } expected_conflict = [{ 'path': '/authors/1', 'op': 'remove', 'value': None, '$type': 'REMOVE_FIELD' }] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert conflict == expected_conflict validate_subschema(merged)
def test_grobid_on_arxiv_operations_without_conflict(): root = {} authors_arxiv = { "authors": [ { "full_name": "Sułkowski, Piotr", "raw_affiliations": [{ "value": "Warsaw U." }], "emails": [ "*****@*****.**" ], } ] } authors_grobid = { "authors": [ { "full_name": "Sułkowski, Piotr", "raw_affiliations": [{ "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland" }], "emails": [ "*****@*****.**" ], } ] } expected_merged = { "authors": [ { "raw_affiliations": [{ "value": "Warsaw U., Faculty of Physics, Pastuera 7, Warsaw, Poland" }], "emails": [ "*****@*****.**" ], "full_name": "Sułkowski, Piotr" } ] } merged, conflicts = merge(root, authors_arxiv, authors_grobid, configuration=GrobidOnArxivAuthorsOperations) assert not conflicts assert merged == expected_merged
def merge_article_with_crossref_data(article): doi = get_value(article, "dois[0].value") if not doi: return article try: crossref_data = import_doi(doi) except (ImportConnectionError, ImportParsingError): LOGGER.exception("Cannot merge submission with %r,", doi) return article merged, conflicts = merge(root={}, head=article, update=crossref_data) article = merged if conflicts: LOGGER.debug("Ignoring conflicts while enhancing submission.\n%r", conflicts) return article
def test_merging_acquisition_source_field(): root = {} # record_id: 1517095 head = {'acquisition_source': {'method': 'submitter', 'source': 'arxiv'}} update = { 'acquisition_source': { 'method': 'batchuploader', 'source': 'arxiv' } } expected_merged = update expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_license_field(): root = { 'license': [{ 'imposing': 'Elsevier', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'elsevier foo bar' }] } head = { 'license': [{ 'imposing': 'Elsevier', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'elsevier foo bar' }, { 'imposing': 'arXiv', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'arxiv foo bar' }] } update = { 'license': [{ 'imposing': 'Elsevier', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'elsevier foo bar updated!' }] } expected_merged = { 'license': [{ 'imposing': 'Elsevier', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'elsevier foo bar updated!' }, { 'imposing': 'arXiv', 'url': 'http://creativecommons.org/licenses/by/4.0/', 'license': 'arxiv foo bar' }] } expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def test_merging_publication_info_for_arxiv_on_publisher(fake_get_config): root = { "publication_info": [{ "year": 2021, "artid": "051701", "material": "publication", "journal_issue": "5", "journal_title": "root title", "journal_record": { "$ref": "https://inspirehep.net/api/journals/1613970" }, "journal_volume": "104", }] } head = { "publication_info": [{ "year": 2021, "artid": "051701", "material": "publication", "journal_issue": "5", "journal_title": "head title", "journal_record": { "$ref": "https://inspirehep.net/api/journals/1613970" }, "journal_volume": "104", }] } update = { "publication_info": [{ "year": 2021, "artid": "051701", "material": "publication", "journal_issue": "5", "journal_title": "update title", "journal_record": { "$ref": "https://inspirehep.net/api/journals/1613970" }, "journal_volume": "104", }] } merged, conflict = merge(root, head, update) assert len(merged['publication_info']) == 1 assert merged['publication_info'][0]['journal_title'] == 'head title'
def test_comparing_keywords(): root = {} head = { 'keywords': [{ 'value': 'shielding', 'schema': 'JACOW', }, { 'value': 'test', 'schema': 'JACOW', }] } update = { 'keywords': [{ 'value': 'shielding', 'schema': 'INSPIRE', }, { 'value': 'shielding', 'schema': 'JACOW', }] } expected_conflict = [] expected_merged = { 'keywords': [ { 'value': 'shielding', 'schema': 'INSPIRE', }, { 'value': 'shielding', 'schema': 'JACOW', }, { 'value': 'test', 'schema': 'JACOW', }, ] } merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) update = obj.data update_source = get_source(update).lower() head_root = read_wf_record_source(record_uuid=head.id, source=update_source) head_root = head_root.json if head_root else {} merged, conflicts = merge( head=head.dumps(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def test_merging_raw_affiliations_field(): root = {} head = { 'authors': [{ 'full_name': 'Pitts, Kevin T', 'raw_affiliations': [{ 'source': 'arxiv', 'value': 'Department of Physics, Indiana University, Bloomington, IN 47405, USA' }] }] } update = { 'authors': [{ 'full_name': 'Pitts, Kevin T', 'raw_affiliations': [{ 'source': 'arxiv', 'value': 'Department of Physics, Indiana University, Bloomington, IN 47405, US' }, { 'source': 'arxiv', 'value': 'Padua U', }] }] } expected_merged = update expected_conflict = [] merged, conflict = merge(root, head, update, head_source='arxiv') assert merged == expected_merged assert_ordered_conflicts(conflict, expected_conflict) validate_subschema(merged)
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: For the time being the ``root`` will be ignored, and we'll rely only on the ``head``, hence it is a rootless implementation. Also when the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) root = {} update = obj.data merged, conflicts = merge(head=head.dumps(), root=root, update=update) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()