def setUp(self):
        self.marcxml = pkg_resources.resource_string('tests',
                                                     os.path.join(
                                                         'fixtures',
                                                         'test_hep_formats.xml')
                                                     )

        self.marcxml_publi_info = pkg_resources.resource_string('tests',
                                                                os.path.join(
                                                                    'fixtures',
                                                                    'test_hep_publi_info.xml')
                                                                )
        record = create_record(self.marcxml)

        record_publi_info = create_record(self.marcxml_publi_info)

        self.hep_record = hep.do(record)

        self.hep_record_publi_info = hep.do(record_publi_info)

        self.sample_cv_latex = {
            'author': 'G.~Aad',
            'title': "{\\bf ``\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV $pp$ collisions with the ATLAS detector\n''}",
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'],
            'url': cfg['CFG_SITE_URL'] + '/record/1351762',
            'date': 'Mar 11, 2015'
        }

        self.sample_cv_latex_publi_info = {
            'publi_info': ['Class.\\ Quant.\\ Grav.\\  {\\bf 15}, 2153 (1998)']
        }
Example #2
0
    def setUp(self):
        self.marcxml = pkg_resources.resource_string('tests',
                                                     os.path.join(
                                                         'fixtures',
                                                         'test_hep_formats.xml')
                                                     )

        self.marcxml_publi_info = pkg_resources.resource_string('tests',
                                                                os.path.join(
                                                                    'fixtures',
                                                                    'test_hep_publi_info.xml')
                                                                )
        record = create_record(self.marcxml)

        record_publi_info = create_record(self.marcxml_publi_info)

        self.hep_record = hep.do(record)

        self.hep_record_publi_info = hep.do(record_publi_info)

        self.latex_eu = Latex(self.hep_record, 'latex_eu')

        self.latex_us = Latex(self.hep_record, 'latex_us')

        self.latex_eu_publi_info = Latex(
            self.hep_record_publi_info, 'latex_eu')

        self.sample_latex_eu = {
            'citation_key': 'Aad:2015wqa',
            'author': 'G.~Aad',
            'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n',
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 7,  318', '[Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 10,  463]'],
            'arxiv': 'arXiv:1503.03290 [hep-ex]',
            'report_number': '',
            'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%',

        }

        self.sample_latex_publi_info = {
            'publi_info': ['Class.\\ Quant.\\ Grav.\\  {\\bf 15} (1998) 2153']
        }

        self.sample_latex_us = {
            'citation_key': 'Aad:2015wqa',
            'author': 'G.~Aad',
            'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n',
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'],
            'arxiv': 'arXiv:1503.03290 [hep-ex]',
            'report_number': '',
            'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%',

        }
Example #3
0
def test_hidden_notes_from_595__a_9_and_595__double_a_9():
    snippet = (
        '<record>'
        '  <datafield tag="595" ind1=" " ind2=" ">'
        '    <subfield code="9">SPIRES-HIDDEN</subfield>'
        '    <subfield code="a">Title changed from ALLCAPS</subfield>'
        '  </datafield>'
        '  <datafield tag="595" ind1=" " ind2=" ">'
        '    <subfield code="9">SPIRES-HIDDEN</subfield>'
        '    <subfield code="a">TeXtitle from script</subfield>'
        '    <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/109310

    expected = [
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'Title changed from ALLCAPS',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'TeXtitle from script',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['hidden_notes']
def test_urls_from_marcxml_multiple_8564():
    snippet = (
        '<record>'
        '  <datafield tag="856" ind1="4" ind2="">'
        '    <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>'
        '    <subfield code="y">Conference web page</subfield>'
        '  </datafield>'
        '  <datafield tag="856" ind1="4" ind2="">'
        '    <subfield code="u">http://www.cern.ch/</subfield>'
        '    <subfield code="y">CERN web page</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'description': 'Conference web page',
            'value': 'http://www.physics.unlv.edu/labastro/',
        },
        {
            'description': 'CERN web page',
            'value': 'http://www.cern.ch/',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['urls']
def test_arxiv_eprints_from_037__a_c_9():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="9">arXiv</subfield>'
        '  <subfield code="a">arXiv:1505.01843</subfield>'
        '  <subfield code="c">hep-ph</subfield>'
        '</datafield>'
    )  # record/1368891

    expected = [
        {
            'categories': [
                'hep-ph',
            ],
            'value': '1505.01843',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['arxiv_eprints'], subschema) is None
    assert expected == result['arxiv_eprints']

    expected = [
        {
            '9': 'arXiv',
            'a': 'arXiv:1505.01843',
            'c': 'hep-ph',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
def test_texkeys_from_035__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['texkeys']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">INSPIRETeX</subfield>'
        '  <subfield code="a">Hagedorn:1963hdh</subfield>'
        '</datafield>'
    )  # record/1403324

    expected = [
        'Hagedorn:1963hdh',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['texkeys'], subschema) is None
    assert expected == result['texkeys']

    expected = [
        {
            '9': 'INSPIRETeX',
            'a': 'Hagedorn:1963hdh',
        }
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
Example #7
0
def record_not_yet_deleted(app):
    snippet = (
        '<record>'
        '  <controlfield tag="001">333</controlfield>'
        '  <controlfield tag="005">20160913214552.0</controlfield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record = hep.do(create_record(snippet))
        json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record = record_upsert(json_record)
            if record:
                ri = RecordIndexer()
                ri.index(record)

        db.session.commit()

    yield

    with app.app_context():
        _delete_record_from_everywhere('literature', 333)
def test_report_numbers_from_037__z_9():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="9">SLAC</subfield>'
        '  <subfield code="a">SLAC-PUB-16140</subfield>'
        '</datafield>'
    )  # record/1326454

    expected = [
        {
            'source': 'SLAC',
            'value': 'SLAC-PUB-16140',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {
            '9': 'SLAC',
            'a': 'SLAC-PUB-16140',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
Example #9
0
def test_thesis_supervisors_from_701__a_double_u():
    snippet = (
        '<datafield tag="701" ind1=" " ind2=" ">'
        '  <subfield code="a">Mnich, Joachim</subfield>'
        '  <subfield code="u">DESY</subfield>'
        '  <subfield code="u">U. Hamburg (main)</subfield>'
        '</datafield>'
    )  # record/1462486

    expected = [
        {
            'affiliations': [
                {
                    'curated_relation': False,
                    'value': 'DESY',
                },
                {
                    'curated_relation': False,
                    'value': 'U. Hamburg (main)',
                },
            ],
            'full_name': 'Mnich, Joachim',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['thesis_supervisors']
def test_external_system_identifiers_from_035__z_9_handles_cernkey():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">CERNKEY</subfield>'
        '  <subfield code="z">0263439</subfield>'
        '</datafield>'
    )  # record/451647

    expected = [
        {
            'schema': 'CERNKEY',
            'value': '0263439',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']

    expected = [
        {
            '9': 'CERNKEY',
            'z': '0263439',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
def test_dois_from_2472_a_2_and_247_a_2_9():
    snippet = (
        '<record>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.1103/PhysRevD.89.072002</subfield>'
        '  </datafield>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="9">bibmatch</subfield>'
        '    <subfield code="a">10.1103/PhysRevD.91.019903</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'value': '10.1103/PhysRevD.89.072002',
        },
        {
            'source': 'bibmatch',
            'value': '10.1103/PhysRevD.91.019903',
        },
    ]
    result = hep.do(create_record(snippet))

    assert expected == result['dois']
Example #12
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
def test_languages_from_double_041__a():
    schema = load_schema('hep')
    subschema = schema['properties']['languages']

    snippet = (
        '<record>'
        '  <datafield tag="041" ind1=" " ind2=" ">'
        '    <subfield code="a">French</subfield>'
        '  </datafield>'
        '  <datafield tag="041" ind1=" " ind2=" ">'
        '    <subfield code="a">German</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1231408

    expected = [
        'fr',
        'de',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['languages'], subschema) is None
    assert expected == result['languages']

    expected = [
        {'a': 'french'},
        {'a': 'german'},
    ]
    result = hep2marc.do(result)

    assert expected == result['041']
def test_report_numbers_from_two_037__a():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<record>'
        '  <datafield tag="037" ind1=" " ind2=" ">'
        '    <subfield code="a">UTPT-89-27</subfield>'
        '  </datafield>'
        '  <datafield tag="037" ind1=" " ind2=" ">'
        '    <subfield code="a">CALT-68-1585</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/26564

    expected = [
        {
            'value': 'UTPT-89-27',
        },
        {
            'value': 'CALT-68-1585',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {'a': 'UTPT-89-27'},
        {'a': 'CALT-68-1585'},
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
def test_titles_from_245__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="245" ind1=" " ind2=" ">'
        '  <subfield code="a">Exact Form of Boundary Operators Dual to '
        'Interacting Bulk Scalar Fields in the AdS/CFT Correspondence</subfield>'
        '  <subfield code="9">arXiv</subfield>'
        '</datafield>'
    )  # record/001511698

    expected = [
        {
            'title': 'Exact Form of Boundary Operators Dual to Interacting '
                     'Bulk Scalar Fields in the AdS/CFT Correspondence',
            'source': 'arXiv',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']

    expected = [
        {
            'a': 'Exact Form of Boundary Operators Dual to Interacting '
                 'Bulk Scalar Fields in the AdS/CFT Correspondence',
            '9': 'arXiv',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['245']
def test_title_translations_from_242__a(stable_langdetect):
    schema = load_schema('hep')
    subschema = schema['properties']['title_translations']

    snippet = (
        '<datafield tag="242" ind1=" " ind2=" ">'
        '  <subfield code="a">The redshift of extragalactic nebulae</subfield>'
        '</datafield>'
    )  # record/8352

    expected = [
        {
            'language': 'en',
            'title': 'The redshift of extragalactic nebulae',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['title_translations'], subschema) is None
    assert expected == result['title_translations']

    expected = [
        {
            'a': 'The redshift of extragalactic nebulae',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['242']
def test_isbns_from_020__a_b_normalizes_ebook():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">9783319259017</subfield>'
        '  <subfield code="b">eBook</subfield>'
        '</datafield>'
    )  # record/1430829

    expected = [
        {
            'value': '9783319259017',
            'medium': 'online',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9783319259017',
            'b': 'online',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_electronic():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">9783319006260</subfield>'
        '  <subfield code="b">electronic version</subfield>'
        '</datafield>'
    )  # record/1292006

    expected = [
        {
            'value': '9783319006260',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9783319006260',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_online():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">978-94-024-0999-4</subfield>'
        '  <subfield code="b">Online</subfield>'
        '</datafield>'
    )  # record/1504286

    expected = [
        {
            'value': '9789402409994',
            'medium': 'online',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9789402409994',
            'b': 'online',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
def test_isbns_from_020__a_b_normalizes_hardcover():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">978-981-4571-66-1</subfield>'
        '  <subfield code="b">hardcover</subfield>'
        '</datafield>'
    )  # record/1351311

    expected = [
        {
            'value': '9789814571661',
            'medium': 'hardcover',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9789814571661',
            'b': 'hardcover',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
def test_dois_from_0247_a_2_double_9_ignores_curator_source():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = (
        '<datafield tag="024" ind1="7" ind2=" ">'
        '  <subfield code="2">DOI</subfield>'
        '  <subfield code="9">bibcheck</subfield>'
        '  <subfield code="9">CURATOR</subfield>'
        '  <subfield code="a">10.1590/S1806-11172008005000006</subfield>'
        '</datafield>'
    )  # record/1117362

    expected = [
        {
            'source': 'bibcheck',
            'value': '10.1590/S1806-11172008005000006',
        },
    ]
    result = hep.do(create_record(snippet))  # no roundtrip

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1590/S1806-11172008005000006',
            '9': 'bibcheck',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['0247']
def test_languages_from_041__a_handles_multiple_languages_in_one_a():
    schema = load_schema('hep')
    subschema = schema['properties']['languages']

    snippet = (
        '<datafield tag="041" ind1=" " ind2=" ">'
        '  <subfield code="a">Russian / English</subfield>'
        '</datafield>'
    )  # record/116959

    expected = [
        'ru',
        'en',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['languages'], subschema) is None
    assert expected == result['languages']

    expected = [
        {'a': 'russian'},
        {'a': 'english'},
    ]
    result = hep2marc.do(result)

    assert expected == result['041']
def test_dois_from_0247_a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = (
        '<datafield tag="024" ind1="7" ind2=" ">'
        '  <subfield code="2">DOI</subfield>'
        '  <subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
        '</datafield>'
    )  # record/1302395

    expected = [
        {'value': '10.1088/0264-9381/31/24/245004'},
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1088/0264-9381/31/24/245004',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['0247']
def test_harvesting_arxiv_workflow_accepted(
    mocked, db_only_app, record_oai_arxiv_plots):
    """Test a full harvesting workflow."""
    from invenio_workflows import (
        start, WorkflowEngine, ObjectStatus, workflow_object_class
    )
    from dojson.contrib.marc21.utils import create_record
    from invenio_db import db
    from inspirehep.dojson.hep import hep
    from inspirehep.modules.converter.xslt import convert

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(
        record_oai_arxiv_plots,
        "oaiarXiv2marcxml.xsl"
    )
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with db_only_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == "2014"
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with db_only_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
def test_isbns_from_020__a_b_normalizes_print():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">9781786341105</subfield>'
        '  <subfield code="b">Print</subfield>'
        '</datafield>'
    )  # record/1509456

    expected = [
        {
            'value': '9781786341105',
            'medium': 'print',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9781786341105',
            'b': 'print',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
def test_titles_from_246__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="246" ind1=" " ind2=" ">'
        '  <subfield code="a">Superintegrable relativistic systems in'
        ' spacetime-dependent background fields</subfield>'
        '  <subfield code="9">arXiv</subfield>'
        '</datafield>'
    )  # record/1511471

    expected = [
        {
            'source': 'arXiv',
            'title': 'Superintegrable relativistic systems in '
                     'spacetime-dependent background fields',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']

    expected = [
        {
            'a': 'Superintegrable relativistic systems in spacetime-dependent background fields',
            '9': 'arXiv',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['245']
def test_external_system_numbers_from_035__a_d_h_m_9():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">http://cds.cern.ch/oai2d</subfield>'
        '  <subfield code="a">oai:cds.cern.ch:325030</subfield>'
        '  <subfield code="d">2015-06-05T13:24:42Z</subfield>'
        '  <subfield code="h">2015-11-09T16:22:48Z</subfield>'
        '  <subfield code="m">marcxml</subfield>'
        '</datafield>'
    )  # record/1403324

    expected = [
        {
            'value': 'oai:cds.cern.ch:325030',
            'schema': 'http://cds.cern.ch/oai2d',
        }
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']

    expected = [
        {
            '9': 'http://cds.cern.ch/oai2d',
            'a': 'oai:cds.cern.ch:325030',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
def test_title_translations_from_242__a_b(stable_langdetect):
    schema = load_schema('hep')
    subschema = schema['properties']['title_translations']

    snippet = (
        '<datafield tag="242" ind1=" " ind2=" ">'
        '  <subfield code="a">Generalized Hamilton-Jacobi Formalism</subfield>'
        '  <subfield code="b">Field Theories with Upper-Order Derivatives</subfield>'
        '</datafield>'
        )  # record/1501064

    expected = [
        {
            'language': 'en',
            'title': 'Generalized Hamilton-Jacobi Formalism',
            'subtitle': 'Field Theories with Upper-Order Derivatives',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['title_translations'], subschema) is None
    assert expected == result['title_translations']

    expected = [
        {
            'a': 'Generalized Hamilton-Jacobi Formalism',
            'b': 'Field Theories with Upper-Order Derivatives',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['242']
def test_report_numbers_hidden_from_037__z():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="z">FERMILAB-PUB-17-011-CMS</subfield>'
        '</datafield>'
    )  # record/1508174

    expected = [
        {
            'hidden': True,
            'value': 'FERMILAB-PUB-17-011-CMS',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {'z': 'FERMILAB-PUB-17-011-CMS'}
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
def test_titles_from_245__a_b():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="245" ind1=" " ind2=" ">'
        '  <subfield code="a">Proceedings, New Observables in Quarkonium Production</subfield>'
        '  <subfield code="b">Trento, Italy</subfield>'
        '</datafield>'
    )  # record/1510141

    expected = [
        {
            'title': 'Proceedings, New Observables in Quarkonium Production',
            'subtitle': 'Trento, Italy',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']

    expected = [
        {
            'a': 'Proceedings, New Observables in Quarkonium Production',
            'b': 'Trento, Italy',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['245']
Example #31
0
def test_single_doi():
    snippet_single_doi = (
        '<record><datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
        '</datafield></record>')

    x = create_record(snippet_single_doi)
    assert (strip_empty_values(hep.do(x))['dois'] == [{
        'value':
        '10.1088/0264-9381/31/24/245004'
    }])
Example #32
0
def test_thesis_from_502_b_double_c_d_double_z():
    schema = load_schema('hep')
    subschema = schema['properties']['thesis']

    snippet = (
        '<datafield tag="502" ind1=" " ind2=" ">'
        '  <subfield code="b">Thesis</subfield>'
        '  <subfield code="c">Nice U.</subfield>'
        '  <subfield code="c">Cote d\'Azur Observ., Nice</subfield>'
        '  <subfield code="d">2014</subfield>'
        '  <subfield code="z">903069</subfield>'
        '  <subfield code="z">904125</subfield>'
        '</datafield>'
    )  # record/1385648

    expected = {
        '_degree_type': 'Thesis',
        'date': '2014',
        'degree_type': 'Thesis',
        'institutions': [
            {
                'curated_relation': True,
                'name': 'Nice U.',
                'record': {
                    '$ref': 'http://localhost:5000/api/institutions/903069',
                },
            },
            {
                'curated_relation': True,
                'name': 'Cote d\'Azur Observ., Nice',
                'record': {
                    '$ref': 'http://localhost:5000/api/institutions/904125',
                },
            },
        ],
    }
    result = hep.do(create_record(snippet))

    assert validate(result['thesis'], subschema) is None
    assert expected == result['thesis']

    expected = {
        'b': 'Thesis',
        'c': [
            'Nice U.',
            'Cote d\'Azur Observ., Nice',
        ],
        'd': '2014',
    }
    result = hep2marc.do(result)

    assert expected == result['502']
def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    snippet = '''
        <record>
          <datafield tag="037" ind1=" " ind2=" ">
            <subfield code="9">arXiv</subfield>
            <subfield code="a">arXiv:1702.00702</subfield>
            <subfield code="c">math-ph</subfield>
          </datafield>
          <datafield tag="650" ind1="1" ind2="7">
            <subfield code="a">math-ph</subfield>
            <subfield code="2">arXiv</subfield>
          </datafield><datafield tag="650" ind1="1" ind2="7">
            <subfield code="a">gr-qc</subfield>
            <subfield code="2">arXiv</subfield></datafield>
        </record>
    ''' # record/1511862

    expected = [{
        # the first element is the one in 037__c
        'categories': ['math-ph', 'gr-qc'],
        'value': '1702.00702'
    }]
    result = hep.do(create_record(snippet))

    assert validate(result['arxiv_eprints'], subschema) is None
    assert expected == result['arxiv_eprints']

    expected = {
        # 035 is discarded in hep.do, so it needs to be derived here
        '035': [{
            '9': 'arXiv',
            'a': 'oai:arXiv.org:1702.00702'
        }],
        '037': [{
            '9': 'arXiv',
            'a': 'arXiv:1702.00702',
            'c': 'math-ph'
        }],
        '65017': [{
            '2': 'arXiv',
            'a': 'math-ph'
        }, {
            '2': 'arXiv',
            'a': 'gr-qc'
        }]
    }
    result = hep2marc.do(result)

    assert expected == result
Example #34
0
def test_authors_from_100__a_i_u_x_y():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']

    snippet = ('<datafield tag="100" ind1=" " ind2=" ">'
               '  <subfield code="a">Glashow, S.L.</subfield>'
               '  <subfield code="i">INSPIRE-00085173</subfield>'
               '  <subfield code="u">Copenhagen U.</subfield>'
               '  <subfield code="x">1008235</subfield>'
               '  <subfield code="y">1</subfield>'
               '</datafield>')  # record/4328/export/xme

    expected = [
        {
            'affiliations': [
                {
                    'value': 'Copenhagen U.',
                },
            ],
            'curated_relation': True,
            'full_name': 'Glashow, S.L.',
            'ids': [
                {
                    'type': 'INSPIRE ID',
                    'value': 'INSPIRE-00085173',
                },
            ],
            'record': {
                '$ref': 'http://localhost:5000/api/authors/1008235',
            },
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['authors'], subschema) is None
    assert expected == result['authors']

    expected = [
        {
            'a': 'Glashow, S.L.',
            'i': [
                'INSPIRE-00085173',
            ],
            'u': [
                'Copenhagen U.',
            ],
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['100']
Example #35
0
def test_harvesting_arxiv_workflow_accepted(mocked, small_app,
                                            record_oai_arxiv_plots):
    """Test a full harvesting workflow."""

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots,
                                             "oaiarXiv2marcxml.xsl")
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with small_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == 2014
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with small_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Example #36
0
def test_authors_from_100__a_u_x_w_y_z_with_malformed_x():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']

    snippet = ('<datafield tag="100" ind1=" " ind2=" ">'
               '  <subfield code="a">Bakhrushin, Iu.P.</subfield>'
               '  <subfield code="u">NIIEFA, St. Petersburg</subfield>'
               '  <subfield code="x">БАХРУШИН, Ю.П.</subfield>'
               '  <subfield code="w">I.P.Bakhrushin.1</subfield>'
               '  <subfield code="y">0</subfield>'
               '  <subfield code="z">903073</subfield>'
               '</datafield>')  # record/931310/export/xme

    expected = [
        {
            'affiliations': [
                {
                    'record': {
                        '$ref':
                        'http://localhost:5000/api/institutions/903073',
                    },
                    'value': 'NIIEFA, St. Petersburg',
                },
            ],
            'curated_relation':
            False,
            'full_name':
            'Bakhrushin, Iu.P.',
            'ids': [
                {
                    'type': 'INSPIRE BAI',
                    'value': 'I.P.Bakhrushin.1',
                },
            ],
        },
    ]
    result = hep.do(create_record(snippet))  # no roundtrip

    assert validate(result['authors'], subschema) is None
    assert expected == result['authors']

    expected = {
        'a': 'Bakhrushin, Iu.P.',
        'u': [
            'NIIEFA, St. Petersburg',
        ],
        'w': 'I.P.Bakhrushin.1',
    }
    result = hep2marc.do(result)

    assert expected == result['100']
def test_HEP_added_to_980__a():
    '''
    HEP is not written explicitly in the record anymore,
    so it needs to be added in hep2marc.
    '''
    schema = load_schema('hep')

    snippet = ''
    result = hep.do(create_record(snippet))

    expected = [{'a': 'HEP'}]
    result = hep2marc.do(result)

    assert expected == result['980']
def test_fft_from_FFT_a_d_f_n_o_t():
    snippet = ('<datafield tag="FFT">'
               '  <subfield code="a">url</subfield>'
               '  <subfield code="t">docfile_type</subfield>'
               '  <subfield code="o">flag</subfield>'
               '  <subfield code="d">description</subfield>'
               '  <subfield code="n">filename</subfield>'
               '  <subfield code="f">filetype</subfield>'
               '</datafield>')

    expected = [
        {
            'url': 'url',
            'docfile_type': 'docfile_type',
            'flag': 'flag',
            'description': 'description',
            'filename': 'filename',
            'filetype': 'filetype',
        },
    ]
    result = hep.do(create_record(snippet))

    assert expected == result['fft']

    expected = [
        {
            'a': 'url',
            't': 'docfile_type',
            'o': 'flag',
            'd': 'description',
            'n': 'filename',
            'f': 'filetype',
        },
    ]
    result = hep2marc.do(hep.do(create_record(snippet)))

    assert expected == result['FFT']
def test_accelerator_experiments(mock_get_record_ref, mock_get_recid_from_ref,
                                 test_name, xml_snippet, expected_json,
                                 expected_marc):
    mock_get_record_ref.side_effect = lambda x, *_: x and 'mocked_record_%s' % x
    mock_get_recid_from_ref.side_effect = lambda x, *_:  x and int(x.rsplit('_')[-1])

    if not xml_snippet.strip().startswith('<record>'):
        xml_snippet = '<record>%s</record>' % xml_snippet

    json_data = hep.do(create_record(xml_snippet))
    json_experiments = json_data['accelerator_experiments']
    marc_experiments = hep2marc.do(json_data)['693']

    assert marc_experiments == expected_marc
    assert json_experiments == expected_json
Example #40
0
def merged_records(app):
    merged_snippet = ('<record>'
                      '  <controlfield tag="001">111</controlfield>'
                      '  <datafield tag="981" ind1=" " ind2=" ">'
                      '    <subfield code="a">222</subfield>'
                      '  </datafield>'
                      '</record>')

    deleted_snippet = ('<record>'
                       '  <controlfield tag="001">222</controlfield>'
                       '  <datafield tag="970" ind1=" " ind2=" ">'
                       '    <subfield code="d">111</subfield>'
                       '  </datafield>'
                       '  <datafield tag="980" ind1=" " ind2=" ">'
                       '    <subfield code="c">DELETED</subfield>'
                       '  </datafield>'
                       '</record>')

    with app.app_context():
        merged_record = hep.do(create_record(merged_snippet))
        merged_record[
            '$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        deleted_record = hep.do(create_record(deleted_snippet))
        deleted_record[
            '$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            merged_uuid = record_upsert(merged_record).id
            deleted_uuid = record_upsert(deleted_record).id
        db.session.commit()

    yield

    with app.app_context():
        _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def test_document_type_article_is_default():
    schema = load_schema('hep')
    subschema = schema['properties']['document_type']

    snippet = ''

    expected = ['article']
    result = hep.do(create_record(snippet))

    assert validate(result['document_type'], subschema) is None
    assert expected == result['document_type']

    not_expected = {'a': 'article'}
    result = hep2marc.do(result)

    assert not_expected not in result['980']
def test_keywords_from_multiple_695__a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['keywords']

    snippet = (
        '<record>'
        '  <datafield tag="695" ind1=" " ind2=" ">'
        '    <subfield code="a">programming: Monte Carlo</subfield>'
        '    <subfield code="2">INSPIRE</subfield>'
        '  </datafield>'
        '  <datafield tag="695" ind1=" " ind2=" ">'
        '    <subfield code="a">electron positron: annihilation</subfield>'
        '    <subfield code="2">INSPIRE</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/363605

    expected = [
        {
            'classification_scheme': 'INSPIRE',
            'keyword': 'programming: Monte Carlo',
        },
        {
            'classification_scheme': 'INSPIRE',
            'keyword': 'electron positron: annihilation',
        },
    ]

    result = hep.do(create_record(snippet))

    assert validate(result['keywords'], subschema) is None
    assert expected == result['keywords']

    expected = [
        {
            'a': 'programming: Monte Carlo',
            '2': 'INSPIRE',
        },
        {
            'a': 'electron positron: annihilation',
            '2': 'INSPIRE',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['695']
def test_control_number_from_001():
    schema = load_schema('hep')
    subschema = schema['properties']['control_number']

    snippet = ('<controlfield tag="001">1508668</controlfield>'
               )  # record/1508668

    expected = 1508668
    result = hep.do(create_record(snippet))

    assert validate(result['control_number'], subschema) is None
    assert expected == result['control_number']

    expected = 1508668
    result = hep2marc.do(result)

    assert expected == result['001']
Example #44
0
def test_authors_from_100__a_v_m_w_y():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']

    snippet = (
        '<datafield tag="100" ind1=" " ind2=" ">'
        '  <subfield code="a">Gao, Xu</subfield>'
        '  <subfield code="v">Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China</subfield>'
        '  <subfield code="m">[email protected]</subfield>'
        '  <subfield code="w">X.Gao.11</subfield>'
        '  <subfield code="y">0</subfield>'
        '</datafield>')  # record/1475380/export/xme

    expected = [
        {
            'curated_relation': False,
            'emails': ['*****@*****.**'],
            'full_name': 'Gao, Xu',
            'ids': [
                {
                    'type': 'INSPIRE BAI',
                    'value': 'X.Gao.11',
                },
            ],
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['authors'], subschema) is None
    assert expected == result['authors']

    expected = {
        'a':
        'Gao, Xu',
        'v': [
            'Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China',
        ],
        'm': [
            '*****@*****.**',
        ],
        'w':
        'X.Gao.11',
    }
    result = hep2marc.do(result)

    assert expected == result['100']
def test_dois_from_0247_a_2():
    snippet = (
        '<record>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'value': '10.1088/0264-9381/31/24/245004',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['dois']
Example #46
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return institutions.do(record)
    elif _collection_in_record(record, 'experiment'):
        return experiments.do(record)
    elif _collection_in_record(record, 'journals'):
        return journals.do(record)
    elif _collection_in_record(record, 'hepnames'):
        return hepnames.do(record)
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return jobs.do(record)
    elif _collection_in_record(record, 'conferences'):
        return conferences.do(record)
    else:
        return hep.do(record)
def test_discard_035__9_arXiv():
    '''035__9:arXiv is redundant with 037__9:arXiv, throw it away'''
    schema = load_schema('hep')

    snippet = '''
        <datafield tag="035" ind1=" " ind2=" ">
          <subfield code="9">arXiv</subfield>
          <subfield code="a">oai:arXiv.org:1611.05079</subfield>
        </datafield>
    ''' # record/1498308

    expected = [{}]
    result = hep.do(create_record(snippet))

    assert validate(result, schema) is None
    assert expected == result['external_system_identifiers']
    assert expected == result['arxiv_eprints']
Example #48
0
def test_urls_from_marcxml_856_with_single_u_no_y():
    snippet = (
        '<record>'
        '  <datafield tag="856" ind1="4" ind2=" ">'
        '    <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected= [
        {
            'value': 'http://www.physics.unlv.edu/labastro/',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['urls']
def test_dois_from_0247_a_2_9_and_0247_a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = (
        '<record>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="9">bibmatch</subfield>'
        '    <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>'
        '  </datafield>'
        '  <datafield tag="024" ind1="7" ind2=" ">'
        '    <subfield code="2">DOI</subfield>'
        '    <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>'
        '  </datafield>'
        '</record>')  # record/1286727

    expected = [
        {
            'source': 'bibmatch',
            'value': '10.1088/1475-7516/2015/03/044',
        },
        {
            'value': '10.1088/1475-7516/2015/03/044',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1088/1475-7516/2015/03/044',
            '9': 'bibmatch',
            '2': 'DOI',
        },
        {
            'a': '10.1088/1475-7516/2015/03/044',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['024']
def test_new_record_from_970__d():
    schema = load_schema('hep')
    subschema = schema['properties']['new_record']

    snippet = ('<datafield tag="970" ind1=" " ind2=" ">'
               '  <subfield code="d">361769</subfield>'
               '</datafield>')  # record/37545

    expected = {'$ref': 'http://localhost:5000/api/record/361769'}
    result = hep.do(create_record(snippet))

    assert validate(result['new_record'], subschema) is None
    assert expected == result['new_record']

    expected = {'d': '361769'}
    result = hep2marc.do(result)

    assert expected == result['970']
def test_dois_from_0247_a_2_and_0247_a_2_9():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = ('<record>'
               '  <datafield tag="024" ind1="7" ind2=" ">'
               '    <subfield code="2">DOI</subfield>'
               '    <subfield code="a">10.1103/PhysRevD.89.072002</subfield>'
               '  </datafield>'
               '  <datafield tag="024" ind1="7" ind2=" ">'
               '    <subfield code="2">DOI</subfield>'
               '    <subfield code="9">bibmatch</subfield>'
               '    <subfield code="a">10.1103/PhysRevD.91.019903</subfield>'
               '  </datafield>'
               '</record>')  # record/1273665

    expected = [
        {
            'value': '10.1103/PhysRevD.89.072002',
        },
        {
            'source': 'bibmatch',
            'value': '10.1103/PhysRevD.91.019903',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1103/PhysRevD.89.072002',
            '2': 'DOI',
        },
        {
            'a': '10.1103/PhysRevD.91.019903',
            '9': 'bibmatch',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['024']
Example #52
0
def test_public_notes_from_500__a_and_500__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['public_notes']

    snippet = (
        '<record>'
        '  <datafield tag="500" ind1=" " ind2=" ">'
        '    <subfield code="a">*Brief entry*</subfield>'
        '  </datafield>'
        '  <datafield tag="500" ind1=" " ind2=" ">'
        '    <subfield code="a">11 pages, 5 figures</subfield>'
        '    <subfield code="9">arXiv</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1450045

    expected = [
        {
            'value': '*Brief entry*',
        },
        {
            'source': 'arXiv',
            'value': '11 pages, 5 figures',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['public_notes'], subschema) is None
    assert expected == result['public_notes']

    expected = [
        {
            'a': '*Brief entry*',
        },
        {
            '9': 'arXiv',
            'a': '11 pages, 5 figures',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['500']
Example #53
0
def test_urls_from_marcxml_856_with_two_u_duplicates_single_y():
    snippet = (
        '<record>'
        '  <datafield tag="856" ind1="4" ind2=" ">'
        '    <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>'
        '    <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>'
        '    <subfield code="y">Conference web page</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'description': 'Conference web page',
            'value': 'http://www.physics.unlv.edu/labastro/',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['urls']
def test_publication_type_from_980__a():
    schema = load_schema('hep')
    subschema = schema['properties']['publication_type']

    snippet = '''
        <datafield tag="980" ind1=" " ind2=" ">
          <subfield code="a">Review</subfield>
        </datafield>
    '''

    expected = ['review']
    result = hep.do(create_record(snippet))

    assert validate(result['publication_type'], subschema) is None
    assert expected == result['publication_type']

    expected = [{'a': 'review'}, {'a': 'HEP'}]
    result = hep2marc.do(result)

    assert sorted(expected) == sorted(result['980'])
def test_core_from_980__a_noncore():
    schema = load_schema('hep')
    subschema = schema['properties']['core']

    snippet = '''
        <datafield tag="980" ind1=" " ind2=" ">
          <subfield code="a">noncore</subfield>
        </datafield>
    '''

    expected = False
    result = hep.do(create_record(snippet))

    assert validate(result['core'], subschema) is None
    assert expected == result['core']

    expected = [{'a': 'noncore'}, {'a': 'HEP'}]
    result = hep2marc.do(result)

    assert sorted(expected) == sorted(result['980'])
Example #56
0
def deleted_record(app):
    snippet = ('<record>'
               '  <controlfield tag="001">111</controlfield>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="c">DELETED</subfield>'
               '  </datafield>'
               '</record>')

    with app.app_context():
        record = hep.do(create_record(snippet))
        record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record_upsert(record)
        db.session.commit()

    yield

    with app.app_context():
        _delete_record('lit', 111)
def test_report_numbers_hidden_from_037__z():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = '''
        <datafield tag="037" ind1=" " ind2=" ">
          <subfield code="z">FERMILAB-PUB-17-011-CMS</subfield>
        </datafield>
    ''' # record/1508174

    expected = [{'hidden': True, 'value': 'FERMILAB-PUB-17-011-CMS'}]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [{'z': 'FERMILAB-PUB-17-011-CMS'}]
    result = hep2marc.do(result)

    assert expected == result['037']
Example #58
0
def test_thesis_from_502__a_c_d_z():
    schema = load_schema('hep')
    subschema = schema['properties']['thesis']

    snippet = (
        '<datafield tag="502" ind1=" " ind2=" ">'
        '  <subfield code="a">PhD</subfield>'
        '  <subfield code="c">IIT, Roorkee</subfield>'
        '  <subfield code="d">2011</subfield>'
        '  <subfield code="z">909554</subfield>'
        '</datafield>'
    )  # record/897773/export/xme

    expected = {
        'date': '2011',
        'defense_date': 'PhD',  # XXX: obviously wrong.
        'institutions': [
            {
                'curated_relation': True,
                'record': {
                    '$ref': 'http://localhost:5000/api/institutions/909554',
                },
                'name': 'IIT, Roorkee',
            },
        ],
    }
    result = hep.do(create_record(snippet))

    assert validate(result['thesis'], subschema) is None
    assert expected == result['thesis']

    expected = {
        'a': 'PhD',
        'c': [
            'IIT, Roorkee',
        ],
        'd': '2011',
    }
    result = hep2marc.do(result)

    assert expected == result['502']
Example #59
0
    def _author_list(obj, eng):
        from inspirehep.modules.converter import convert

        arxiv_id = get_clean_arXiv_id(obj.data)
        filename = secure_filename("{0}.tar.gz".format(arxiv_id))
        if filename not in obj.files:
            tarball = download_file_to_workflow(
                workflow=obj,
                name=filename,
                url=current_app.config['ARXIV_TARBALL_URL'].format(
                    arxiv_id=arxiv_id
                )
            )
        else:
            tarball = obj.files[filename]

        sub_dir = os.path.abspath("{0}_files".format(tarball.file.uri))
        try:
            file_list = untar(tarball.file.uri, sub_dir)
        except InvalidTarball:
            obj.log.error("Invalid tarball {0}".format(tarball.file.uri))
            return
        obj.log.info("Extracted tarball to: {0}".format(sub_dir))

        xml_files_list = [path for path in file_list
                          if path.endswith(".xml")]
        obj.log.info("Found xmlfiles: {0}".format(xml_files_list))

        for xml_file in xml_files_list:
            xml_file_fd = open(xml_file, "r")
            xml_content = xml_file_fd.read()
            xml_file_fd.close()

            match = REGEXP_AUTHLIST.findall(xml_content)
            if match:
                obj.log.info("Found a match for author extraction")
                authors_xml = convert(xml_content, stylesheet)
                authors_rec = create_record(authors_xml)
                authorlist_record = hep.do(authors_rec)
                obj.data.update(authorlist_record)
                break
Example #60
0
def test_hidden_notes_from_595__double_a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['hidden_notes']

    snippet = (
        '<datafield tag="595" ind1=" " ind2=" ">'
        '  <subfield code="9">SPIRES-HIDDEN</subfield>'
        '  <subfield code="a">TeXtitle from script</subfield>'
        '  <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>'
        '</datafield>'
    )  # record/109310

    expected = [
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'TeXtitle from script',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['hidden_notes'], subschema) is None
    assert expected == result['hidden_notes']

    expected = [
        {
            '9': 'SPIRES-HIDDEN',
            'a': 'TeXtitle from script',
        },
        {
            '9': 'SPIRES-HIDDEN',
            'a': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['595']