def setUp(self): self.marcxml = pkg_resources.resource_string('tests', os.path.join( 'fixtures', 'test_hep_formats.xml') ) self.marcxml_publi_info = pkg_resources.resource_string('tests', os.path.join( 'fixtures', 'test_hep_publi_info.xml') ) record = create_record(self.marcxml) record_publi_info = create_record(self.marcxml_publi_info) self.hep_record = hep.do(record) self.hep_record_publi_info = hep.do(record_publi_info) self.sample_cv_latex = { 'author': 'G.~Aad', 'title': "{\\bf ``\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV $pp$ collisions with the ATLAS detector\n''}", 'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'], 'url': cfg['CFG_SITE_URL'] + '/record/1351762', 'date': 'Mar 11, 2015' } self.sample_cv_latex_publi_info = { 'publi_info': ['Class.\\ Quant.\\ Grav.\\ {\\bf 15}, 2153 (1998)'] }
def setUp(self): self.marcxml = pkg_resources.resource_string('tests', os.path.join( 'fixtures', 'test_hep_formats.xml') ) self.marcxml_publi_info = pkg_resources.resource_string('tests', os.path.join( 'fixtures', 'test_hep_publi_info.xml') ) record = create_record(self.marcxml) record_publi_info = create_record(self.marcxml_publi_info) self.hep_record = hep.do(record) self.hep_record_publi_info = hep.do(record_publi_info) self.latex_eu = Latex(self.hep_record, 'latex_eu') self.latex_us = Latex(self.hep_record, 'latex_us') self.latex_eu_publi_info = Latex( self.hep_record_publi_info, 'latex_eu') self.sample_latex_eu = { 'citation_key': 'Aad:2015wqa', 'author': 'G.~Aad', 'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n', 'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 7, 318', '[Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 10, 463]'], 'arxiv': 'arXiv:1503.03290 [hep-ex]', 'report_number': '', 'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%', } self.sample_latex_publi_info = { 'publi_info': ['Class.\\ Quant.\\ Grav.\\ {\\bf 15} (1998) 2153'] } self.sample_latex_us = { 'citation_key': 'Aad:2015wqa', 'author': 'G.~Aad', 'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n', 'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'], 'arxiv': 'arXiv:1503.03290 [hep-ex]', 'report_number': '', 'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%', }
def test_hidden_notes_from_595__a_9_and_595__double_a_9(): snippet = ( '<record>' ' <datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="9">SPIRES-HIDDEN</subfield>' ' <subfield code="a">Title changed from ALLCAPS</subfield>' ' </datafield>' ' <datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="9">SPIRES-HIDDEN</subfield>' ' <subfield code="a">TeXtitle from script</subfield>' ' <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>' ' </datafield>' '</record>' ) # record/109310 expected = [ { 'source': 'SPIRES-HIDDEN', 'value': 'Title changed from ALLCAPS', }, { 'source': 'SPIRES-HIDDEN', 'value': 'TeXtitle from script', }, { 'source': 'SPIRES-HIDDEN', 'value': 'no affiliation (not clear pn the fulltext)', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['hidden_notes']
def test_urls_from_marcxml_multiple_8564(): snippet = ( '<record>' ' <datafield tag="856" ind1="4" ind2="">' ' <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>' ' <subfield code="y">Conference web page</subfield>' ' </datafield>' ' <datafield tag="856" ind1="4" ind2="">' ' <subfield code="u">http://www.cern.ch/</subfield>' ' <subfield code="y">CERN web page</subfield>' ' </datafield>' '</record>' ) expected = [ { 'description': 'Conference web page', 'value': 'http://www.physics.unlv.edu/labastro/', }, { 'description': 'CERN web page', 'value': 'http://www.cern.ch/', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['urls']
def test_arxiv_eprints_from_037__a_c_9(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] snippet = ( '<datafield tag="037" ind1=" " ind2=" ">' ' <subfield code="9">arXiv</subfield>' ' <subfield code="a">arXiv:1505.01843</subfield>' ' <subfield code="c">hep-ph</subfield>' '</datafield>' ) # record/1368891 expected = [ { 'categories': [ 'hep-ph', ], 'value': '1505.01843', }, ] result = hep.do(create_record(snippet)) assert validate(result['arxiv_eprints'], subschema) is None assert expected == result['arxiv_eprints'] expected = [ { '9': 'arXiv', 'a': 'arXiv:1505.01843', 'c': 'hep-ph', }, ] result = hep2marc.do(result) assert expected == result['037']
def test_texkeys_from_035__a_9(): schema = load_schema('hep') subschema = schema['properties']['texkeys'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">INSPIRETeX</subfield>' ' <subfield code="a">Hagedorn:1963hdh</subfield>' '</datafield>' ) # record/1403324 expected = [ 'Hagedorn:1963hdh', ] result = hep.do(create_record(snippet)) assert validate(result['texkeys'], subschema) is None assert expected == result['texkeys'] expected = [ { '9': 'INSPIRETeX', 'a': 'Hagedorn:1963hdh', } ] result = hep2marc.do(result) assert expected == result['035']
def record_not_yet_deleted(app): snippet = ( '<record>' ' <controlfield tag="001">333</controlfield>' ' <controlfield tag="005">20160913214552.0</controlfield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' </datafield>' '</record>' ) with app.app_context(): json_record = hep.do(create_record(snippet)) json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): record = record_upsert(json_record) if record: ri = RecordIndexer() ri.index(record) db.session.commit() yield with app.app_context(): _delete_record_from_everywhere('literature', 333)
def test_report_numbers_from_037__z_9(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] snippet = ( '<datafield tag="037" ind1=" " ind2=" ">' ' <subfield code="9">SLAC</subfield>' ' <subfield code="a">SLAC-PUB-16140</subfield>' '</datafield>' ) # record/1326454 expected = [ { 'source': 'SLAC', 'value': 'SLAC-PUB-16140', }, ] result = hep.do(create_record(snippet)) assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] expected = [ { '9': 'SLAC', 'a': 'SLAC-PUB-16140', }, ] result = hep2marc.do(result) assert expected == result['037']
def test_thesis_supervisors_from_701__a_double_u(): snippet = ( '<datafield tag="701" ind1=" " ind2=" ">' ' <subfield code="a">Mnich, Joachim</subfield>' ' <subfield code="u">DESY</subfield>' ' <subfield code="u">U. Hamburg (main)</subfield>' '</datafield>' ) # record/1462486 expected = [ { 'affiliations': [ { 'curated_relation': False, 'value': 'DESY', }, { 'curated_relation': False, 'value': 'U. Hamburg (main)', }, ], 'full_name': 'Mnich, Joachim', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['thesis_supervisors']
def test_external_system_identifiers_from_035__z_9_handles_cernkey(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERNKEY</subfield>' ' <subfield code="z">0263439</subfield>' '</datafield>' ) # record/451647 expected = [ { 'schema': 'CERNKEY', 'value': '0263439', }, ] result = hep.do(create_record(snippet)) assert validate(result['external_system_identifiers'], subschema) is None assert expected == result['external_system_identifiers'] expected = [ { '9': 'CERNKEY', 'z': '0263439', }, ] result = hep2marc.do(result) assert expected == result['035']
def test_dois_from_2472_a_2_and_247_a_2_9(): snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1103/PhysRevD.89.072002</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1103/PhysRevD.91.019903</subfield>' ' </datafield>' '</record>' ) expected = [ { 'value': '10.1103/PhysRevD.89.072002', }, { 'source': 'bibmatch', 'value': '10.1103/PhysRevD.91.019903', }, ] result = hep.do(create_record(snippet)) assert expected == result['dois']
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob from inspirehep.dojson.utils import strip_empty_values from inspirehep.dojson.hep import hep from inspirehep.dojson.institutions import institutions from inspirehep.dojson.journals import journals from inspirehep.dojson.experiments import experiments from inspirehep.dojson.hepnames import hepnames from inspirehep.dojson.jobs import jobs from inspirehep.dojson.conferences import conferences for data in split_blob(source.read()): record = create_record(data) if _collection_in_record(record, 'institution'): yield strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): yield strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): yield strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): yield strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): yield strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): yield strip_empty_values(conferences.do(record)) else: yield strip_empty_values(hep.do(record))
def test_languages_from_double_041__a(): schema = load_schema('hep') subschema = schema['properties']['languages'] snippet = ( '<record>' ' <datafield tag="041" ind1=" " ind2=" ">' ' <subfield code="a">French</subfield>' ' </datafield>' ' <datafield tag="041" ind1=" " ind2=" ">' ' <subfield code="a">German</subfield>' ' </datafield>' '</record>' ) # record/1231408 expected = [ 'fr', 'de', ] result = hep.do(create_record(snippet)) assert validate(result['languages'], subschema) is None assert expected == result['languages'] expected = [ {'a': 'french'}, {'a': 'german'}, ] result = hep2marc.do(result) assert expected == result['041']
def test_report_numbers_from_two_037__a(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] snippet = ( '<record>' ' <datafield tag="037" ind1=" " ind2=" ">' ' <subfield code="a">UTPT-89-27</subfield>' ' </datafield>' ' <datafield tag="037" ind1=" " ind2=" ">' ' <subfield code="a">CALT-68-1585</subfield>' ' </datafield>' '</record>' ) # record/26564 expected = [ { 'value': 'UTPT-89-27', }, { 'value': 'CALT-68-1585', }, ] result = hep.do(create_record(snippet)) assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] expected = [ {'a': 'UTPT-89-27'}, {'a': 'CALT-68-1585'}, ] result = hep2marc.do(result) assert expected == result['037']
def test_titles_from_245__a_9(): schema = load_schema('hep') subschema = schema['properties']['titles'] snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Exact Form of Boundary Operators Dual to ' 'Interacting Bulk Scalar Fields in the AdS/CFT Correspondence</subfield>' ' <subfield code="9">arXiv</subfield>' '</datafield>' ) # record/001511698 expected = [ { 'title': 'Exact Form of Boundary Operators Dual to Interacting ' 'Bulk Scalar Fields in the AdS/CFT Correspondence', 'source': 'arXiv', }, ] result = hep.do(create_record(snippet)) assert validate(result['titles'], subschema) is None assert expected == result['titles'] expected = [ { 'a': 'Exact Form of Boundary Operators Dual to Interacting ' 'Bulk Scalar Fields in the AdS/CFT Correspondence', '9': 'arXiv', }, ] result = hep2marc.do(result) assert expected == result['245']
def test_title_translations_from_242__a(stable_langdetect): schema = load_schema('hep') subschema = schema['properties']['title_translations'] snippet = ( '<datafield tag="242" ind1=" " ind2=" ">' ' <subfield code="a">The redshift of extragalactic nebulae</subfield>' '</datafield>' ) # record/8352 expected = [ { 'language': 'en', 'title': 'The redshift of extragalactic nebulae', }, ] result = hep.do(create_record(snippet)) assert validate(result['title_translations'], subschema) is None assert expected == result['title_translations'] expected = [ { 'a': 'The redshift of extragalactic nebulae', }, ] result = hep2marc.do(result) assert expected == result['242']
def test_isbns_from_020__a_b_normalizes_ebook(): schema = load_schema('hep') subschema = schema['properties']['isbns'] snippet = ( '<datafield tag="020" ind1=" " ind2=" ">' ' <subfield code="a">9783319259017</subfield>' ' <subfield code="b">eBook</subfield>' '</datafield>' ) # record/1430829 expected = [ { 'value': '9783319259017', 'medium': 'online', }, ] result = hep.do(create_record(snippet)) assert validate(result['isbns'], subschema) is None assert expected == result['isbns'] expected = [ { 'a': '9783319259017', 'b': 'online', }, ] result = hep2marc.do(result) assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_electronic(): schema = load_schema('hep') subschema = schema['properties']['isbns'] snippet = ( '<datafield tag="020" ind1=" " ind2=" ">' ' <subfield code="a">9783319006260</subfield>' ' <subfield code="b">electronic version</subfield>' '</datafield>' ) # record/1292006 expected = [ { 'value': '9783319006260', }, ] result = hep.do(create_record(snippet)) assert validate(result['isbns'], subschema) is None assert expected == result['isbns'] expected = [ { 'a': '9783319006260', }, ] result = hep2marc.do(result) assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_online(): schema = load_schema('hep') subschema = schema['properties']['isbns'] snippet = ( '<datafield tag="020" ind1=" " ind2=" ">' ' <subfield code="a">978-94-024-0999-4</subfield>' ' <subfield code="b">Online</subfield>' '</datafield>' ) # record/1504286 expected = [ { 'value': '9789402409994', 'medium': 'online', }, ] result = hep.do(create_record(snippet)) assert validate(result['isbns'], subschema) is None assert expected == result['isbns'] expected = [ { 'a': '9789402409994', 'b': 'online', }, ] result = hep2marc.do(result) assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_hardcover(): schema = load_schema('hep') subschema = schema['properties']['isbns'] snippet = ( '<datafield tag="020" ind1=" " ind2=" ">' ' <subfield code="a">978-981-4571-66-1</subfield>' ' <subfield code="b">hardcover</subfield>' '</datafield>' ) # record/1351311 expected = [ { 'value': '9789814571661', 'medium': 'hardcover', }, ] result = hep.do(create_record(snippet)) assert validate(result['isbns'], subschema) is None assert expected == result['isbns'] expected = [ { 'a': '9789814571661', 'b': 'hardcover', }, ] result = hep2marc.do(result) assert expected == result['020']
def test_dois_from_0247_a_2_double_9_ignores_curator_source(): schema = load_schema('hep') subschema = schema['properties']['dois'] snippet = ( '<datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibcheck</subfield>' ' <subfield code="9">CURATOR</subfield>' ' <subfield code="a">10.1590/S1806-11172008005000006</subfield>' '</datafield>' ) # record/1117362 expected = [ { 'source': 'bibcheck', 'value': '10.1590/S1806-11172008005000006', }, ] result = hep.do(create_record(snippet)) # no roundtrip assert validate(result['dois'], subschema) is None assert expected == result['dois'] expected = [ { 'a': '10.1590/S1806-11172008005000006', '9': 'bibcheck', '2': 'DOI', }, ] result = hep2marc.do(result) assert expected == result['0247']
def test_languages_from_041__a_handles_multiple_languages_in_one_a(): schema = load_schema('hep') subschema = schema['properties']['languages'] snippet = ( '<datafield tag="041" ind1=" " ind2=" ">' ' <subfield code="a">Russian / English</subfield>' '</datafield>' ) # record/116959 expected = [ 'ru', 'en', ] result = hep.do(create_record(snippet)) assert validate(result['languages'], subschema) is None assert expected == result['languages'] expected = [ {'a': 'russian'}, {'a': 'english'}, ] result = hep2marc.do(result) assert expected == result['041']
def test_dois_from_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['dois'] snippet = ( '<datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1088/0264-9381/31/24/245004</subfield>' '</datafield>' ) # record/1302395 expected = [ {'value': '10.1088/0264-9381/31/24/245004'}, ] result = hep.do(create_record(snippet)) assert validate(result['dois'], subschema) is None assert expected == result['dois'] expected = [ { 'a': '10.1088/0264-9381/31/24/245004', '2': 'DOI', }, ] result = hep2marc.do(result) assert expected == result['0247']
def test_harvesting_arxiv_workflow_accepted( mocked, db_only_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import ( start, WorkflowEngine, ObjectStatus, workflow_object_class ) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert( record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl" ) record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with db_only_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with db_only_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_isbns_from_020__a_b_normalizes_print(): schema = load_schema('hep') subschema = schema['properties']['isbns'] snippet = ( '<datafield tag="020" ind1=" " ind2=" ">' ' <subfield code="a">9781786341105</subfield>' ' <subfield code="b">Print</subfield>' '</datafield>' ) # record/1509456 expected = [ { 'value': '9781786341105', 'medium': 'print', }, ] result = hep.do(create_record(snippet)) assert validate(result['isbns'], subschema) is None assert expected == result['isbns'] expected = [ { 'a': '9781786341105', 'b': 'print', }, ] result = hep2marc.do(result) assert expected == result['020']
def test_titles_from_246__a_9(): schema = load_schema('hep') subschema = schema['properties']['titles'] snippet = ( '<datafield tag="246" ind1=" " ind2=" ">' ' <subfield code="a">Superintegrable relativistic systems in' ' spacetime-dependent background fields</subfield>' ' <subfield code="9">arXiv</subfield>' '</datafield>' ) # record/1511471 expected = [ { 'source': 'arXiv', 'title': 'Superintegrable relativistic systems in ' 'spacetime-dependent background fields', }, ] result = hep.do(create_record(snippet)) assert validate(result['titles'], subschema) is None assert expected == result['titles'] expected = [ { 'a': 'Superintegrable relativistic systems in spacetime-dependent background fields', '9': 'arXiv', }, ] result = hep2marc.do(result) assert expected == result['245']
def test_external_system_numbers_from_035__a_d_h_m_9(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">http://cds.cern.ch/oai2d</subfield>' ' <subfield code="a">oai:cds.cern.ch:325030</subfield>' ' <subfield code="d">2015-06-05T13:24:42Z</subfield>' ' <subfield code="h">2015-11-09T16:22:48Z</subfield>' ' <subfield code="m">marcxml</subfield>' '</datafield>' ) # record/1403324 expected = [ { 'value': 'oai:cds.cern.ch:325030', 'schema': 'http://cds.cern.ch/oai2d', } ] result = hep.do(create_record(snippet)) assert validate(result['external_system_identifiers'], subschema) is None assert expected == result['external_system_identifiers'] expected = [ { '9': 'http://cds.cern.ch/oai2d', 'a': 'oai:cds.cern.ch:325030', }, ] result = hep2marc.do(result) assert expected == result['035']
def test_title_translations_from_242__a_b(stable_langdetect): schema = load_schema('hep') subschema = schema['properties']['title_translations'] snippet = ( '<datafield tag="242" ind1=" " ind2=" ">' ' <subfield code="a">Generalized Hamilton-Jacobi Formalism</subfield>' ' <subfield code="b">Field Theories with Upper-Order Derivatives</subfield>' '</datafield>' ) # record/1501064 expected = [ { 'language': 'en', 'title': 'Generalized Hamilton-Jacobi Formalism', 'subtitle': 'Field Theories with Upper-Order Derivatives', }, ] result = hep.do(create_record(snippet)) assert validate(result['title_translations'], subschema) is None assert expected == result['title_translations'] expected = [ { 'a': 'Generalized Hamilton-Jacobi Formalism', 'b': 'Field Theories with Upper-Order Derivatives', }, ] result = hep2marc.do(result) assert expected == result['242']
def test_report_numbers_hidden_from_037__z(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] snippet = ( '<datafield tag="037" ind1=" " ind2=" ">' ' <subfield code="z">FERMILAB-PUB-17-011-CMS</subfield>' '</datafield>' ) # record/1508174 expected = [ { 'hidden': True, 'value': 'FERMILAB-PUB-17-011-CMS', }, ] result = hep.do(create_record(snippet)) assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] expected = [ {'z': 'FERMILAB-PUB-17-011-CMS'} ] result = hep2marc.do(result) assert expected == result['037']
def test_titles_from_245__a_b(): schema = load_schema('hep') subschema = schema['properties']['titles'] snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Proceedings, New Observables in Quarkonium Production</subfield>' ' <subfield code="b">Trento, Italy</subfield>' '</datafield>' ) # record/1510141 expected = [ { 'title': 'Proceedings, New Observables in Quarkonium Production', 'subtitle': 'Trento, Italy', }, ] result = hep.do(create_record(snippet)) assert validate(result['titles'], subschema) is None assert expected == result['titles'] expected = [ { 'a': 'Proceedings, New Observables in Quarkonium Production', 'b': 'Trento, Italy', }, ] result = hep2marc.do(result) assert expected == result['245']
def test_single_doi(): snippet_single_doi = ( '<record><datafield tag="024" ind1="7" ind2=" ">' '<subfield code="2">DOI</subfield>' '<subfield code="a">10.1088/0264-9381/31/24/245004</subfield>' '</datafield></record>') x = create_record(snippet_single_doi) assert (strip_empty_values(hep.do(x))['dois'] == [{ 'value': '10.1088/0264-9381/31/24/245004' }])
def test_thesis_from_502_b_double_c_d_double_z(): schema = load_schema('hep') subschema = schema['properties']['thesis'] snippet = ( '<datafield tag="502" ind1=" " ind2=" ">' ' <subfield code="b">Thesis</subfield>' ' <subfield code="c">Nice U.</subfield>' ' <subfield code="c">Cote d\'Azur Observ., Nice</subfield>' ' <subfield code="d">2014</subfield>' ' <subfield code="z">903069</subfield>' ' <subfield code="z">904125</subfield>' '</datafield>' ) # record/1385648 expected = { '_degree_type': 'Thesis', 'date': '2014', 'degree_type': 'Thesis', 'institutions': [ { 'curated_relation': True, 'name': 'Nice U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903069', }, }, { 'curated_relation': True, 'name': 'Cote d\'Azur Observ., Nice', 'record': { '$ref': 'http://localhost:5000/api/institutions/904125', }, }, ], } result = hep.do(create_record(snippet)) assert validate(result['thesis'], subschema) is None assert expected == result['thesis'] expected = { 'b': 'Thesis', 'c': [ 'Nice U.', 'Cote d\'Azur Observ., Nice', ], 'd': '2014', } result = hep2marc.do(result) assert expected == result['502']
def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] snippet = ''' <record> <datafield tag="037" ind1=" " ind2=" "> <subfield code="9">arXiv</subfield> <subfield code="a">arXiv:1702.00702</subfield> <subfield code="c">math-ph</subfield> </datafield> <datafield tag="650" ind1="1" ind2="7"> <subfield code="a">math-ph</subfield> <subfield code="2">arXiv</subfield> </datafield><datafield tag="650" ind1="1" ind2="7"> <subfield code="a">gr-qc</subfield> <subfield code="2">arXiv</subfield></datafield> </record> ''' # record/1511862 expected = [{ # the first element is the one in 037__c 'categories': ['math-ph', 'gr-qc'], 'value': '1702.00702' }] result = hep.do(create_record(snippet)) assert validate(result['arxiv_eprints'], subschema) is None assert expected == result['arxiv_eprints'] expected = { # 035 is discarded in hep.do, so it needs to be derived here '035': [{ '9': 'arXiv', 'a': 'oai:arXiv.org:1702.00702' }], '037': [{ '9': 'arXiv', 'a': 'arXiv:1702.00702', 'c': 'math-ph' }], '65017': [{ '2': 'arXiv', 'a': 'math-ph' }, { '2': 'arXiv', 'a': 'gr-qc' }] } result = hep2marc.do(result) assert expected == result
def test_authors_from_100__a_i_u_x_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] snippet = ('<datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Glashow, S.L.</subfield>' ' <subfield code="i">INSPIRE-00085173</subfield>' ' <subfield code="u">Copenhagen U.</subfield>' ' <subfield code="x">1008235</subfield>' ' <subfield code="y">1</subfield>' '</datafield>') # record/4328/export/xme expected = [ { 'affiliations': [ { 'value': 'Copenhagen U.', }, ], 'curated_relation': True, 'full_name': 'Glashow, S.L.', 'ids': [ { 'type': 'INSPIRE ID', 'value': 'INSPIRE-00085173', }, ], 'record': { '$ref': 'http://localhost:5000/api/authors/1008235', }, }, ] result = hep.do(create_record(snippet)) assert validate(result['authors'], subschema) is None assert expected == result['authors'] expected = [ { 'a': 'Glashow, S.L.', 'i': [ 'INSPIRE-00085173', ], 'u': [ 'Copenhagen U.', ], }, ] result = hep2marc.do(result) assert expected == result['100']
def test_harvesting_arxiv_workflow_accepted(mocked, small_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl") record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with small_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == 2014 assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with small_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_authors_from_100__a_u_x_w_y_z_with_malformed_x(): schema = load_schema('hep') subschema = schema['properties']['authors'] snippet = ('<datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Bakhrushin, Iu.P.</subfield>' ' <subfield code="u">NIIEFA, St. Petersburg</subfield>' ' <subfield code="x">БАХРУШИН, Ю.П.</subfield>' ' <subfield code="w">I.P.Bakhrushin.1</subfield>' ' <subfield code="y">0</subfield>' ' <subfield code="z">903073</subfield>' '</datafield>') # record/931310/export/xme expected = [ { 'affiliations': [ { 'record': { '$ref': 'http://localhost:5000/api/institutions/903073', }, 'value': 'NIIEFA, St. Petersburg', }, ], 'curated_relation': False, 'full_name': 'Bakhrushin, Iu.P.', 'ids': [ { 'type': 'INSPIRE BAI', 'value': 'I.P.Bakhrushin.1', }, ], }, ] result = hep.do(create_record(snippet)) # no roundtrip assert validate(result['authors'], subschema) is None assert expected == result['authors'] expected = { 'a': 'Bakhrushin, Iu.P.', 'u': [ 'NIIEFA, St. Petersburg', ], 'w': 'I.P.Bakhrushin.1', } result = hep2marc.do(result) assert expected == result['100']
def test_HEP_added_to_980__a(): ''' HEP is not written explicitly in the record anymore, so it needs to be added in hep2marc. ''' schema = load_schema('hep') snippet = '' result = hep.do(create_record(snippet)) expected = [{'a': 'HEP'}] result = hep2marc.do(result) assert expected == result['980']
def test_fft_from_FFT_a_d_f_n_o_t(): snippet = ('<datafield tag="FFT">' ' <subfield code="a">url</subfield>' ' <subfield code="t">docfile_type</subfield>' ' <subfield code="o">flag</subfield>' ' <subfield code="d">description</subfield>' ' <subfield code="n">filename</subfield>' ' <subfield code="f">filetype</subfield>' '</datafield>') expected = [ { 'url': 'url', 'docfile_type': 'docfile_type', 'flag': 'flag', 'description': 'description', 'filename': 'filename', 'filetype': 'filetype', }, ] result = hep.do(create_record(snippet)) assert expected == result['fft'] expected = [ { 'a': 'url', 't': 'docfile_type', 'o': 'flag', 'd': 'description', 'n': 'filename', 'f': 'filetype', }, ] result = hep2marc.do(hep.do(create_record(snippet))) assert expected == result['FFT']
def test_accelerator_experiments(mock_get_record_ref, mock_get_recid_from_ref, test_name, xml_snippet, expected_json, expected_marc): mock_get_record_ref.side_effect = lambda x, *_: x and 'mocked_record_%s' % x mock_get_recid_from_ref.side_effect = lambda x, *_: x and int(x.rsplit('_')[-1]) if not xml_snippet.strip().startswith('<record>'): xml_snippet = '<record>%s</record>' % xml_snippet json_data = hep.do(create_record(xml_snippet)) json_experiments = json_data['accelerator_experiments'] marc_experiments = hep2marc.do(json_data)['693'] assert marc_experiments == expected_marc assert json_experiments == expected_json
def merged_records(app): merged_snippet = ('<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="981" ind1=" " ind2=" ">' ' <subfield code="a">222</subfield>' ' </datafield>' '</record>') deleted_snippet = ('<record>' ' <controlfield tag="001">222</controlfield>' ' <datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="d">111</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>') with app.app_context(): merged_record = hep.do(create_record(merged_snippet)) merged_record[ '$schema'] = 'http://localhost:5000/schemas/records/hep.json' deleted_record = hep.do(create_record(deleted_snippet)) deleted_record[ '$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): merged_uuid = record_upsert(merged_record).id deleted_uuid = record_upsert(deleted_record).id db.session.commit() yield with app.app_context(): _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def test_document_type_article_is_default(): schema = load_schema('hep') subschema = schema['properties']['document_type'] snippet = '' expected = ['article'] result = hep.do(create_record(snippet)) assert validate(result['document_type'], subschema) is None assert expected == result['document_type'] not_expected = {'a': 'article'} result = hep2marc.do(result) assert not_expected not in result['980']
def test_keywords_from_multiple_695__a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] snippet = ( '<record>' ' <datafield tag="695" ind1=" " ind2=" ">' ' <subfield code="a">programming: Monte Carlo</subfield>' ' <subfield code="2">INSPIRE</subfield>' ' </datafield>' ' <datafield tag="695" ind1=" " ind2=" ">' ' <subfield code="a">electron positron: annihilation</subfield>' ' <subfield code="2">INSPIRE</subfield>' ' </datafield>' '</record>' ) # record/363605 expected = [ { 'classification_scheme': 'INSPIRE', 'keyword': 'programming: Monte Carlo', }, { 'classification_scheme': 'INSPIRE', 'keyword': 'electron positron: annihilation', }, ] result = hep.do(create_record(snippet)) assert validate(result['keywords'], subschema) is None assert expected == result['keywords'] expected = [ { 'a': 'programming: Monte Carlo', '2': 'INSPIRE', }, { 'a': 'electron positron: annihilation', '2': 'INSPIRE', }, ] result = hep2marc.do(result) assert expected == result['695']
def test_control_number_from_001(): schema = load_schema('hep') subschema = schema['properties']['control_number'] snippet = ('<controlfield tag="001">1508668</controlfield>' ) # record/1508668 expected = 1508668 result = hep.do(create_record(snippet)) assert validate(result['control_number'], subschema) is None assert expected == result['control_number'] expected = 1508668 result = hep2marc.do(result) assert expected == result['001']
def test_authors_from_100__a_v_m_w_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] snippet = ( '<datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Gao, Xu</subfield>' ' <subfield code="v">Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="w">X.Gao.11</subfield>' ' <subfield code="y">0</subfield>' '</datafield>') # record/1475380/export/xme expected = [ { 'curated_relation': False, 'emails': ['*****@*****.**'], 'full_name': 'Gao, Xu', 'ids': [ { 'type': 'INSPIRE BAI', 'value': 'X.Gao.11', }, ], }, ] result = hep.do(create_record(snippet)) assert validate(result['authors'], subschema) is None assert expected == result['authors'] expected = { 'a': 'Gao, Xu', 'v': [ 'Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China', ], 'm': [ '*****@*****.**', ], 'w': 'X.Gao.11', } result = hep2marc.do(result) assert expected == result['100']
def test_dois_from_0247_a_2(): snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1088/0264-9381/31/24/245004</subfield>' ' </datafield>' '</record>') expected = [ { 'value': '10.1088/0264-9381/31/24/245004', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['dois']
def overdo_marc_dict(record): """Convert MARC Groupable Ordered Dict into JSON.""" if _collection_in_record(record, 'institution'): return institutions.do(record) elif _collection_in_record(record, 'experiment'): return experiments.do(record) elif _collection_in_record(record, 'journals'): return journals.do(record) elif _collection_in_record(record, 'hepnames'): return hepnames.do(record) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): return jobs.do(record) elif _collection_in_record(record, 'conferences'): return conferences.do(record) else: return hep.do(record)
def test_discard_035__9_arXiv(): '''035__9:arXiv is redundant with 037__9:arXiv, throw it away''' schema = load_schema('hep') snippet = ''' <datafield tag="035" ind1=" " ind2=" "> <subfield code="9">arXiv</subfield> <subfield code="a">oai:arXiv.org:1611.05079</subfield> </datafield> ''' # record/1498308 expected = [{}] result = hep.do(create_record(snippet)) assert validate(result, schema) is None assert expected == result['external_system_identifiers'] assert expected == result['arxiv_eprints']
def test_urls_from_marcxml_856_with_single_u_no_y(): snippet = ( '<record>' ' <datafield tag="856" ind1="4" ind2=" ">' ' <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>' ' </datafield>' '</record>' ) expected= [ { 'value': 'http://www.physics.unlv.edu/labastro/', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['urls']
def test_dois_from_0247_a_2_9_and_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['dois'] snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' '</record>') # record/1286727 expected = [ { 'source': 'bibmatch', 'value': '10.1088/1475-7516/2015/03/044', }, { 'value': '10.1088/1475-7516/2015/03/044', }, ] result = hep.do(create_record(snippet)) assert validate(result['dois'], subschema) is None assert expected == result['dois'] expected = [ { 'a': '10.1088/1475-7516/2015/03/044', '9': 'bibmatch', '2': 'DOI', }, { 'a': '10.1088/1475-7516/2015/03/044', '2': 'DOI', }, ] result = hep2marc.do(result) assert expected == result['024']
def test_new_record_from_970__d(): schema = load_schema('hep') subschema = schema['properties']['new_record'] snippet = ('<datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="d">361769</subfield>' '</datafield>') # record/37545 expected = {'$ref': 'http://localhost:5000/api/record/361769'} result = hep.do(create_record(snippet)) assert validate(result['new_record'], subschema) is None assert expected == result['new_record'] expected = {'d': '361769'} result = hep2marc.do(result) assert expected == result['970']
def test_dois_from_0247_a_2_and_0247_a_2_9(): schema = load_schema('hep') subschema = schema['properties']['dois'] snippet = ('<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1103/PhysRevD.89.072002</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1103/PhysRevD.91.019903</subfield>' ' </datafield>' '</record>') # record/1273665 expected = [ { 'value': '10.1103/PhysRevD.89.072002', }, { 'source': 'bibmatch', 'value': '10.1103/PhysRevD.91.019903', }, ] result = hep.do(create_record(snippet)) assert validate(result['dois'], subschema) is None assert expected == result['dois'] expected = [ { 'a': '10.1103/PhysRevD.89.072002', '2': 'DOI', }, { 'a': '10.1103/PhysRevD.91.019903', '9': 'bibmatch', '2': 'DOI', }, ] result = hep2marc.do(result) assert expected == result['024']
def test_public_notes_from_500__a_and_500__a_9(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] snippet = ( '<record>' ' <datafield tag="500" ind1=" " ind2=" ">' ' <subfield code="a">*Brief entry*</subfield>' ' </datafield>' ' <datafield tag="500" ind1=" " ind2=" ">' ' <subfield code="a">11 pages, 5 figures</subfield>' ' <subfield code="9">arXiv</subfield>' ' </datafield>' '</record>' ) # record/1450045 expected = [ { 'value': '*Brief entry*', }, { 'source': 'arXiv', 'value': '11 pages, 5 figures', }, ] result = hep.do(create_record(snippet)) assert validate(result['public_notes'], subschema) is None assert expected == result['public_notes'] expected = [ { 'a': '*Brief entry*', }, { '9': 'arXiv', 'a': '11 pages, 5 figures', }, ] result = hep2marc.do(result) assert expected == result['500']
def test_urls_from_marcxml_856_with_two_u_duplicates_single_y(): snippet = ( '<record>' ' <datafield tag="856" ind1="4" ind2=" ">' ' <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>' ' <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>' ' <subfield code="y">Conference web page</subfield>' ' </datafield>' '</record>' ) expected = [ { 'description': 'Conference web page', 'value': 'http://www.physics.unlv.edu/labastro/', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['urls']
def test_publication_type_from_980__a(): schema = load_schema('hep') subschema = schema['properties']['publication_type'] snippet = ''' <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">Review</subfield> </datafield> ''' expected = ['review'] result = hep.do(create_record(snippet)) assert validate(result['publication_type'], subschema) is None assert expected == result['publication_type'] expected = [{'a': 'review'}, {'a': 'HEP'}] result = hep2marc.do(result) assert sorted(expected) == sorted(result['980'])
def test_core_from_980__a_noncore(): schema = load_schema('hep') subschema = schema['properties']['core'] snippet = ''' <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">noncore</subfield> </datafield> ''' expected = False result = hep.do(create_record(snippet)) assert validate(result['core'], subschema) is None assert expected == result['core'] expected = [{'a': 'noncore'}, {'a': 'HEP'}] result = hep2marc.do(result) assert sorted(expected) == sorted(result['980'])
def deleted_record(app): snippet = ('<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>') with app.app_context(): record = hep.do(create_record(snippet)) record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): record_upsert(record) db.session.commit() yield with app.app_context(): _delete_record('lit', 111)
def test_report_numbers_hidden_from_037__z(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] snippet = ''' <datafield tag="037" ind1=" " ind2=" "> <subfield code="z">FERMILAB-PUB-17-011-CMS</subfield> </datafield> ''' # record/1508174 expected = [{'hidden': True, 'value': 'FERMILAB-PUB-17-011-CMS'}] result = hep.do(create_record(snippet)) assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] expected = [{'z': 'FERMILAB-PUB-17-011-CMS'}] result = hep2marc.do(result) assert expected == result['037']
def test_thesis_from_502__a_c_d_z(): schema = load_schema('hep') subschema = schema['properties']['thesis'] snippet = ( '<datafield tag="502" ind1=" " ind2=" ">' ' <subfield code="a">PhD</subfield>' ' <subfield code="c">IIT, Roorkee</subfield>' ' <subfield code="d">2011</subfield>' ' <subfield code="z">909554</subfield>' '</datafield>' ) # record/897773/export/xme expected = { 'date': '2011', 'defense_date': 'PhD', # XXX: obviously wrong. 'institutions': [ { 'curated_relation': True, 'record': { '$ref': 'http://localhost:5000/api/institutions/909554', }, 'name': 'IIT, Roorkee', }, ], } result = hep.do(create_record(snippet)) assert validate(result['thesis'], subschema) is None assert expected == result['thesis'] expected = { 'a': 'PhD', 'c': [ 'IIT, Roorkee', ], 'd': '2011', } result = hep2marc.do(result) assert expected == result['502']
def _author_list(obj, eng): from inspirehep.modules.converter import convert arxiv_id = get_clean_arXiv_id(obj.data) filename = secure_filename("{0}.tar.gz".format(arxiv_id)) if filename not in obj.files: tarball = download_file_to_workflow( workflow=obj, name=filename, url=current_app.config['ARXIV_TARBALL_URL'].format( arxiv_id=arxiv_id ) ) else: tarball = obj.files[filename] sub_dir = os.path.abspath("{0}_files".format(tarball.file.uri)) try: file_list = untar(tarball.file.uri, sub_dir) except InvalidTarball: obj.log.error("Invalid tarball {0}".format(tarball.file.uri)) return obj.log.info("Extracted tarball to: {0}".format(sub_dir)) xml_files_list = [path for path in file_list if path.endswith(".xml")] obj.log.info("Found xmlfiles: {0}".format(xml_files_list)) for xml_file in xml_files_list: xml_file_fd = open(xml_file, "r") xml_content = xml_file_fd.read() xml_file_fd.close() match = REGEXP_AUTHLIST.findall(xml_content) if match: obj.log.info("Found a match for author extraction") authors_xml = convert(xml_content, stylesheet) authors_rec = create_record(authors_xml) authorlist_record = hep.do(authors_rec) obj.data.update(authorlist_record) break
def test_hidden_notes_from_595__double_a_9(): schema = load_schema('hep') subschema = schema['properties']['hidden_notes'] snippet = ( '<datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="9">SPIRES-HIDDEN</subfield>' ' <subfield code="a">TeXtitle from script</subfield>' ' <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>' '</datafield>' ) # record/109310 expected = [ { 'source': 'SPIRES-HIDDEN', 'value': 'TeXtitle from script', }, { 'source': 'SPIRES-HIDDEN', 'value': 'no affiliation (not clear pn the fulltext)', }, ] result = hep.do(create_record(snippet)) assert validate(result['hidden_notes'], subschema) is None assert expected == result['hidden_notes'] expected = [ { '9': 'SPIRES-HIDDEN', 'a': 'TeXtitle from script', }, { '9': 'SPIRES-HIDDEN', 'a': 'no affiliation (not clear pn the fulltext)', }, ] result = hep2marc.do(result) assert expected == result['595']