Ejemplo n.º 1
0
def marcxml2record(marcxml):
    """Convert a MARCXML string to a JSON record.

    Tries to guess which set of rules to use by inspecting the contents
    of the ``980__a`` MARC field, but falls back to HEP in case nothing
    matches, because records belonging to special collections logically
    belong to the Literature collection but don't have ``980__a:HEP``.

    Args:
        marcxml(str): a string containing MARCXML.

    Returns:
        dict: a JSON record converted from the string.

    """
    marcjson = create_record(marcxml, keep_singletons=False)
    collections = _get_collections(marcjson)

    if _is_from_cds(marcjson):
        return hep.do(create_record_from_dict(cds2hep_marc.do(marcjson)))
    elif 'conferences' in collections:
        return conferences.do(marcjson)
    elif 'data' in collections:
        return data.do(marcjson)
    elif 'experiment' in collections:
        return experiments.do(marcjson)
    elif 'hepnames' in collections:
        return hepnames.do(marcjson)
    elif 'institution' in collections:
        return institutions.do(marcjson)
    elif 'job' in collections or 'jobhidden' in collections:
        return jobs.do(marcjson)
    elif 'journals' in collections or 'journalsnew' in collections:
        return journals.do(marcjson)
    return hep.do(marcjson)
Ejemplo n.º 2
0
def test_accelerator_experiments_from_693__a_e():
    schema = load_schema('hep')
    subschema = schema['properties']['accelerator_experiments']

    snippet = ('<datafield tag="693" ind1=" " ind2=" ">'
               '  <subfield code="a">CERN LHC</subfield>'
               '  <subfield code="e">ALICE</subfield>'
               '</datafield>')  # cds.cern.ch/record/2295080

    expected = [
        {
            'a': 'CERN LHC',
            'e': 'CERN-LHC-ALICE',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['693__']

    expected = [
        {
            'legacy_name': 'CERN-LHC-ALICE',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['accelerator_experiments'], subschema) is None
    assert expected == result['accelerator_experiments']
Ejemplo n.º 3
0
def test_abstracts_from_520__a():
    schema = load_schema('hep')
    subschema = schema['properties']['abstracts']

    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.</subfield>'
        '</datafield>')  # cds.cern.ch/record/2295265

    expected = [
        {
            '9':
            'CDS',
            'a':
            u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.'
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['520__']

    expected = [
        {
            'source':
            'CDS',
            'value':
            u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.'
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['abstracts'], subschema) is None
    assert expected == result['abstracts']
Ejemplo n.º 4
0
def test_report_numbers_from_037__a():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = ('<datafield tag="037" ind1=" " ind2=" ">'
               '  <subfield code="a">CLICDP-PUB-2017-002</subfield>'
               '</datafield> ')  # cds.cern.ch/record/2270264

    expected = [
        {
            '9': 'CDS',
            'a': 'CLICDP-PUB-2017-002',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['037__']

    expected = [
        {
            'source': 'CDS',
            'value': 'CLICDP-PUB-2017-002',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']
Ejemplo n.º 5
0
def test_report_numbers_from_088__9():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = ('<datafield tag="088" ind1=" " ind2=" ">'
               '  <subfield code="9">ATL-COM-PHYS-2017-030</subfield>'
               '</datafield>')  # cds.cern.ch/record/2255823

    expected = [{
        '9': 'CDS',
        'z': 'ATL-COM-PHYS-2017-030',
    }]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['037__']
    assert {'a': 'NOTE'} not in result['980__']

    expected = [
        {
            'source': 'CDS',
            'value': 'ATL-COM-PHYS-2017-030',
            'hidden': True,
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']
Ejemplo n.º 6
0
def test_document_type_from_multiple_980_a():
    schema = load_schema('hep')
    subschema = schema['properties']['document_type']

    snippet = ('<record>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="a">ARTICLE</subfield>'
               '  </datafield>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="a">ConferencePaper</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/1979225

    expected = [
        {
            'a': 'ConferencePaper'
        },
        {
            'a': 'HEP'
        },
        {
            'a': 'CORE'
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['980__']

    expected = ['conference paper']
    result = hep.do(create_record_from_dict(result))

    assert validate(result['document_type'], subschema) is None
    assert expected == result['document_type']
Ejemplo n.º 7
0
def test_external_sytem_identifiers_from_035__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = ('<record>'
               '  <datafield tag="035" ind1=" " ind2=" ">'
               '    <subfield code="9">OSTI</subfield>'
               '    <subfield code="a">1358095</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/2295073

    expected = [
        {
            '9': 'OSTI',
            'a': '1358095',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['035__']

    expected = [
        {
            'schema': 'OSTI',
            'value': '1358095',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']
Ejemplo n.º 8
0
def test_dois_from_0247_a_2_9():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = ('<datafield tag="024" ind1="7" ind2=" ">'
               '  <subfield code="2">DOI</subfield>'
               '  <subfield code="9">submitter</subfield>'
               '  <subfield code="a">10.1098/rsta.2014.0044</subfield>'
               '</datafield>')  # cds.cern.ch/record/2295116

    expected = [
        {
            '2': 'DOI',
            '9': 'submitter',
            'a': '10.1098/rsta.2014.0044',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['0247_']

    expected = [
        {
            'source': 'submitter',
            'value': '10.1098/rsta.2014.0044',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']
Ejemplo n.º 9
0
def test_document_type_from_962__b_k_n():
    schema = load_schema('hep')
    subschema = schema['properties']['document_type']

    snippet = ('<datafield tag="962" ind1=" " ind2=" ">'
               '  <subfield code="b">1075481</subfield>'
               '  <subfield code="n">lathuile20080301</subfield>'
               '  <subfield code="k">79-84</subfield>'
               '</datafield>')  # cds.cern.ch/record/2275456

    expected = [
        {
            'a': 'ConferencePaper'
        },
        {
            'a': 'HEP'
        },
        {
            'a': 'CORE'
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['980__']

    expected = ['conference paper']
    result = hep.do(create_record_from_dict(result))

    assert validate(result['document_type'], subschema) is None
    assert expected == result['document_type']
Ejemplo n.º 10
0
def test_urls_from_8564_u_y():
    schema = load_schema('hep')
    subschema = schema['properties']['urls']

    snippet = (
        '<datafield tag="856" ind1="4" ind2=" ">'
        '  <subfield code="u">http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf</subfield>'
        '  <subfield code="y">Published version from PoS</subfield>'
        '</datafield>')  # cds.cern.ch/record/2159118

    expected = [
        {
            'u':
            'http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf',
            'y': 'Published version from PoS',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['8564_']

    expected = [
        {
            'value':
            'http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf',
            'description': 'Published version from PoS',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['urls'], subschema) is None
    assert expected == result['urls']
Ejemplo n.º 11
0
def test_urls_from_8564_s_u_y_8_local_copy():
    schema = load_schema('hep')
    subschema = schema['properties']['urls']

    snippet = (
        '<datafield tag="856" ind1="4" ind2=" ">'
        '  <subfield code="s">1119425</subfield>'
        '  <subfield code="u">http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf</subfield>'
        '  <subfield code="y">Published version from IOP, local copy</subfield>'
        '  <subfield code="8">1053236</subfield>'
        '</datafield>')  # cds.cern.ch/record/2159118

    expected = [
        {
            'u':
            'http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf',
            'y': 'Published version from IOP, on CERN Document Server',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['8564_']

    expected = [
        {
            'value':
            'http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf',
            'description':
            'Published version from IOP, on CERN Document Server',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['urls'], subschema) is None
    assert expected == result['urls']
Ejemplo n.º 12
0
def test_keywords_from_6531_a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['keywords']

    snippet = ('<datafield tag="653" ind1="1" ind2=" ">'
               '  <subfield code="9">CERN</subfield>'
               '  <subfield code="a">QCD</subfield>'
               '</datafield>')  # cds.cern.ch/record/1123149

    expected = [
        {
            '9': 'CERN',
            'a': 'QCD',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['6531_']

    expected = [
        {
            'source': 'CERN',
            'value': 'QCD',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['keywords'], subschema) is None
    assert expected == result['keywords']
Ejemplo n.º 13
0
def test_languages_from_multiple_041__a():
    schema = load_schema('hep')
    subschema = schema['properties']['languages']

    snippet = ('<record>'
               '  <datafield tag="041" ind1=" " ind2=" ">'
               '    <subfield code="a">eng</subfield>'
               '  </datafield>'
               '  <datafield tag="041" ind1=" " ind2=" ">'
               '    <subfield code="a">fre</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/2258299

    expected = [
        {
            'a': 'English'
        },
        {
            'a': 'French'
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['041__']

    expected = ['en', 'fr']
    result = hep.do(create_record_from_dict(result))

    assert validate(result['languages'], subschema) is None
    assert expected == result['languages']
Ejemplo n.º 14
0
def test_external_system_identifiers_from_001():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = ('<controlfield tag="001">2270264</controlfield>'
               )  # cds.cern.ch/record/2270264

    expected = [
        {
            'a': '2270264',
            '9': 'CDS',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['035__']

    expected = [
        {
            'schema': 'CDS',
            'value': '2270264',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']
Ejemplo n.º 15
0
def test_dois_from_0247_a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = ('<datafield tag="024" ind1="7" ind2=" ">'
               '  <subfield code="2">DOI</subfield>'
               '  <subfield code="a">10.1016/j.nima.2017.11.093</subfield>'
               '</datafield>')  # cds.cern.ch/record/2297288

    expected = [
        {
            '2': 'DOI',
            '9': 'CDS',
            'a': '10.1016/j.nima.2017.11.093',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['0247_']

    expected = [
        {
            'source': 'CDS',
            'value': '10.1016/j.nima.2017.11.093',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']
Ejemplo n.º 16
0
def test_imprints_from_260__a_b_c():
    schema = load_schema('hep')
    subschema = schema['properties']['imprints']

    snippet = ('<datafield tag="260" ind1=" " ind2=" ">'
               '  <subfield code="a">Hoboken, NJ</subfield>'
               '  <subfield code="b">Wiley</subfield>'
               '  <subfield code="c">2015</subfield>'
               '</datafield>')  # cds.cern.ch/record/1999859

    expected = {
        'a': 'Hoboken, NJ',
        'b': 'Wiley',
        'c': '2015',
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['260__']

    expected = [{
        'place': 'Hoboken, NJ',
        'publisher': 'Wiley',
        'date': '2015',
    }]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['imprints'], subschema) is None
    assert expected == result['imprints']
Ejemplo n.º 17
0
def test_private_notes_from_001_and_980__c_hidden():
    schema = load_schema('hep')
    subschema = schema['properties']['_private_notes']

    snippet = ('<record>'
               '  <controlfield tag="001">1355275</controlfield>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="c">Hidden</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/1355275

    expected = [
        {
            'a': 'CDS-1355275',
            '9': 'CDS',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['595__']

    expected = [
        {
            'source': 'CDS',
            'value': 'CDS-1355275',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['_private_notes'], subschema) is None
    assert expected == result['_private_notes']
Ejemplo n.º 18
0
def test_titles_from_245__a():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="245" ind1=" " ind2=" ">'
        '  <subfield code="a">Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data</subfield>'
        '</datafield>')  # cds.cern.ch/record/2293251

    expected = {
        '9':
        'CDS',
        'a':
        u'Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data',
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['245__']

    expected = [
        {
            'source':
            'CDS',
            'title':
            u'Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']
Ejemplo n.º 19
0
def test_titles_from_246__a_b():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="246" ind1=" " ind2=" ">'
        '  <subfield code="a">v.2</subfield>'
        '  <subfield code="b">Advances and applications the deterministic case</subfield>'
        '</datafield>')  # cds.cern.ch/record/1999859

    expected = [{
        '9': 'CDS',
        'a': 'v.2',
        'b': 'Advances and applications the deterministic case',
    }]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['246__']

    expected = [
        {
            'source': 'CDS',
            'title': 'v.2',
            'subtitle': 'Advances and applications the deterministic case',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']
Ejemplo n.º 20
0
def test_title_translations_from_242__a():
    schema = load_schema('hep')
    subschema = schema['properties']['title_translations']

    snippet = (
        '<datafield tag="242" ind1=" " ind2=" ">'
        '  <subfield code="a">Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data</subfield>'
        '</datafield>')  # cds.cern.ch/record/2293251

    expected = {
        '9':
        'CDS',
        'a':
        'Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data',
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['242__']

    expected = [
        {
            'source':
            'CDS',
            'language':
            'en',
            'title':
            'Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['title_translations'], subschema) is None
    assert expected == result['title_translations']
Ejemplo n.º 21
0
def test_inspire_categories_from_65017a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['inspire_categories']

    snippet = ('<datafield tag="650" ind1="1" ind2="7">'
               '  <subfield code="2">SzGeCERN</subfield>'
               '  <subfield code="a">Engineering</subfield>'
               '</datafield>')  # cds.cern.ch/record/2276097

    expected = [
        {
            '2': 'INSPIRE',
            '9': 'CDS',
            'a': 'Instrumentation',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['65017']

    expected = [
        {
            'source': 'cds',
            'term': 'Instrumentation',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['inspire_categories'], subschema) is None
    assert expected == result['inspire_categories']
Ejemplo n.º 22
0
def test_authors_from_100__a_normalizes_name():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']

    snippet = ('<datafield tag="100" ind1=" " ind2=" ">'
               '  <subfield code="a">Tagliente, G</subfield>'
               '</datafield>')  # cds.cern.ch/record/1099557

    expected = [
        {
            'a': 'Tagliente, G.',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['100__']

    expected = [
        {
            'full_name': 'Tagliente, G.',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['authors'], subschema) is None
    assert expected == result['authors']
Ejemplo n.º 23
0
def test_publication_info_from_773__c_w_0():
    schema = load_schema('hep')
    subschema = schema['properties']['publication_info']

    snippet = ('<datafield tag="773" ind1=" " ind2=" ">'
               '  <subfield code="0">1217633</subfield>'
               '  <subfield code="c">3-6</subfield>'
               '  <subfield code="w">C07-03-17</subfield>'
               '</datafield>')  # cds.cern.ch/record/2294664

    expected = [
        {
            '0': '1217633',
            'c': '3-6',
            'w': 'C07-03-17',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['773__']

    expected = [
        {
            'cnum': 'C07-03-17',
            'page_start': '3',
            'page_end': '6',
            'parent_record': {
                '$ref': 'http://localhost:5000/api/literature/1217633',
            },
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['publication_info'], subschema) is None
    assert expected == result['publication_info']
Ejemplo n.º 24
0
def test_arxiv_eprints_from_037__a_b_9_and_695__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    snippet = ('<record>'
               '  <datafield tag="037" ind1=" " ind2=" ">'
               '    <subfield code="9">arXiv</subfield>'
               '    <subfield code="a">arXiv:1607.05039</subfield>'
               '    <subfield code="c">hep-ex</subfield>'
               '  </datafield>'
               '  <datafield tag="695" ind1=" " ind2=" ">'
               '    <subfield code="9">LANL EDS</subfield>'
               '    <subfield code="a">hep-ex</subfield>'
               '  </datafield>'
               '  <datafield tag="695" ind1=" " ind2=" ">'
               '    <subfield code="9">LANL EDS</subfield>'
               '    <subfield code="a">hep-ph</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/2270264

    expected = {
        '037__': [
            {
                '9': 'arXiv',
                'a': 'arXiv:1607.05039',
                'c': 'hep-ex',
            },
        ],
        '65017': [
            {
                '2': 'arXiv',
                'a': 'hep-ex',
            },
            {
                '2': 'arXiv',
                'a': 'hep-ph',
            },
        ],
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected['037__'] == result['037__']
    assert expected['65017'] == result['65017']

    expected = [
        {
            'value': '1607.05039',
            'categories': ['hep-ex', 'hep-ph'],
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['arxiv_eprints'], subschema) is None
    assert expected == result['arxiv_eprints']
Ejemplo n.º 25
0
def test_authors_from_100_a_i_j_u_0_9_ignores_beard():
    schema = load_schema('hep')
    subschema = schema['properties']['authors']

    snippet = ('<datafield tag="100" ind1=" " ind2=" ">'
               '  <subfield code="0">AUTHOR|(CDS)2077287</subfield>'
               '  <subfield code="9">#BEARD#</subfield>'
               '  <subfield code="a">Dietz-Laursonn, Erik</subfield>'
               '  <subfield code="i">INSPIRE-00271239</subfield>'
               '  <subfield code="j">CCID-695565</subfield>'
               '  <subfield code="u">Aachen, Tech. Hochsch.</subfield>'
               '</datafield>')  # cds.cern.ch/record/2285529

    expected = [{
        '9': '#BEARD#',
        'a': 'Dietz-Laursonn, Erik',
        'i': 'INSPIRE-00271239',
        'j': 'CCID-695565',
        'u': 'Aachen, Tech. Hochsch.',
    }]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['100__']

    expected = [
        {
            'full_name':
            'Dietz-Laursonn, Erik',
            'ids': [
                {
                    'schema': 'INSPIRE ID',
                    'value': 'INSPIRE-00271239',
                },
                {
                    'schema': 'CERN',
                    'value': 'CERN-695565',
                },
            ],
            'affiliations': [
                {
                    'value': 'Aachen, Tech. Hochsch.'
                },
            ],
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['authors'], subschema) is None
    assert expected == result['authors']
Ejemplo n.º 26
0
def test_thesis_info_from_502__a_b_c_and_500__a():
    schema = load_schema('hep')
    subschema = schema['properties']['thesis_info']

    snippet = ('<record>'
               '  <datafield tag="500" ind1=" " ind2=" ">'
               '    <subfield code="a">Presented 2017</subfield>'
               '  </datafield>'
               '  <datafield tag="502" ind1=" " ind2=" ">'
               '    <subfield code="a">PhD</subfield>'
               '    <subfield code="b">Linz U.</subfield>'
               '    <subfield code="c">2017</subfield>'
               '  </datafield>'
               '</record>')  # cds.cern.ch/record/2295265

    expected = {
        '500__': [
            {
                '9': 'CDS',
                'a': 'Presented 2017',
            },
        ],
        '502__': {
            'b': 'PhD',
            'c': 'Linz U.',
            'd': '2017',
        }
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected['500__'] == result['500__']
    assert expected['502__'] == result['502__']

    expected = {
        'institutions': [
            {
                'name': 'Linz U.'
            },
        ],
        'degree_type': 'phd',
        'date': '2017',
        'defense_date': '2017',
    }
    result = hep.do(create_record_from_dict(result))

    assert validate(result['thesis_info'], subschema) is None
    assert expected == result['thesis_info']
Ejemplo n.º 27
0
def test_collaboration_from_710__g():
    schema = load_schema('hep')
    subschema = schema['properties']['collaborations']

    snippet = ('<datafield tag="710" ind1=" " ind2=" ">'
               '  <subfield code="g">ATLAS Collaboration</subfield>'
               '</datafield>')  # cds.cern.ch/2295739

    expected = [{'g': 'ATLAS Collaboration'}]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['710__']

    expected = [{'value': 'ATLAS'}]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['collaborations'], subschema) is None
    assert expected == result['collaborations']
Ejemplo n.º 28
0
def test_number_of_pages_from_300__a():
    schema = load_schema('hep')
    subschema = schema['properties']['number_of_pages']

    snippet = ('<datafield tag="300" ind1=" " ind2=" ">'
               '  <subfield code="a">20 p</subfield>'
               '</datafield>')  # cds.cern.ch/record/2292558

    expected = {
        'a': '20',
    }
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['300__']

    expected = 20
    result = hep.do(create_record_from_dict(result))

    assert validate(result['number_of_pages'], subschema) is None
    assert expected == result['number_of_pages']
Ejemplo n.º 29
0
def test_corporate_author_from_110__a():
    schema = load_schema('hep')
    subschema = schema['properties']['corporate_author']

    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">CERN. Geneva. Research Board Committee</subfield>'
        '</datafield>')  # cds.cern.ch/record/2292626

    expected = [
        {
            'a': 'CERN. Geneva. Research Board Committee'
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['110__']

    expected = ['CERN. Geneva. Research Board Committee']
    result = hep.do(create_record_from_dict(result))

    assert validate(result['corporate_author'], subschema) is None
    assert expected == result['corporate_author']
Ejemplo n.º 30
0
def test_documents_from_8564_s_u_y_8():
    schema = load_schema('hep')
    subschema = schema['properties']['documents']

    snippet = (
        '<datafield tag="856" ind1="4" ind2=" ">'
        '  <subfield code="8">1369908</subfield>'
        '  <subfield code="s">76482</subfield>'
        '  <subfield code="u">http://cds.cern.ch/record/2294664/files/James.pdf</subfield>'
        '  <subfield code="y">Fulltext</subfield>'
        '</datafield>')  # cds.cern.ch/record/2294664

    expected = [
        {
            't': 'CDS',
            'a': 'http://cds.cern.ch/record/2294664/files/James.pdf',
            'd': 'Fulltext',
            'n': 'James.pdf',
            'f': '.pdf',
        },
    ]
    result = cds2hep_marc.do(create_record(snippet))

    assert expected == result['FFT__']

    expected = [
        {
            'key': 'James.pdf',
            'fulltext': True,
            'source': 'CDS',
            'url': 'http://cds.cern.ch/record/2294664/files/James.pdf',
        },
    ]
    result = hep.do(create_record_from_dict(result))

    assert validate(result['documents'], subschema) is None
    assert expected == result['documents']