def test_address_from_multiple_marcxml__111_c():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Austin, Tex.</subfield>'
        '  </datafield>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Den Haag, Nederlands</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'country_code': 'US',
            'state': 'US-TX',
            'original_address': 'Austin, Tex.'
        },
        {
            'country_code': 'NL',
            'original_address': 'Den Haag, Nederlands'
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
Esempio n. 2
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
def test_titles_from_marcxml_111_with_two_a():
    schema = load_schema('conferences')
    subschema = schema['properties']['titles']

    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">Conférence IAP 2013</subfield>'
        '    <subfield code="a">75 Anniversary Conference</subfield>'
        '    <subfield code="b">The origin of the Hubble sequence</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': u'Conférence IAP 2013',
            'subtitle': 'The origin of the Hubble sequence',
        },
        {
            'title': '75 Anniversary Conference',
            'subtitle': 'The origin of the Hubble sequence',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']
def test_series_name_and_number_and_series_name_from_411__a_n_and_411__a():
    schema = load_schema('conferences')
    subschema = schema['properties']['series']

    snippet = (
        '<record>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">Rencontres de Moriond</subfield>'
        '    <subfield code="n">51</subfield>'
        '  </datafield>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">Moriond EW</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1404073

    expected = [
        {
            'name': 'Rencontres de Moriond',
            'number': 51,
        },
        {
            'name': 'Moriond EW',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['series'], subschema) is None
    assert expected == result['series']
def test_address_from_111__a_c_e_g_x_y_and_270__b():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC</subfield>'
        '    <subfield code="c">Cleveland, Ohio, USA</subfield>'
        '    <subfield code="e">BLV 2017</subfield>'
        '    <subfield code="g">C17-05-15</subfield>'
        '    <subfield code="x">2017-05-15</subfield>'
        '    <subfield code="y">2017-05-18</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="b">Case Western Reserve University</subfield>'
        '  </datafield>'
        '</record>')  # record/1353313

    expected = [
        {
            'original_address': 'Cleveland, Ohio, USA',
            'country_code': 'US',
            'state': 'US-OH',
        },
        {
            'original_address': 'Case Western Reserve University',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_double_series_name_from_double_411__a():
    schema = load_schema('conferences')
    subschema = schema['properties']['series']

    snippet = (
        '<record>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">SNPS</subfield>'
        '  </datafield>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">NSS</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/964177

    expected = [
        {
            'name': 'SNPS',
        },
        {
            'name': 'NSS',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['series'], subschema) is None
    assert expected == result['series']
def test_contact_details_from_multiple_marcxml_270():
    schema = load_schema('conferences')
    subschema = schema['properties']['contact_details']

    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['contact_details'], subschema) is None
    assert expected == result['contact_details']
def test_address_from_111__a_c_d_g_x_y():
    schema = load_schema('conferences')
    subschema = schema['properties']['address']

    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">11th Texas Symposium on Relativistic Astrophysics</subfield>'
        '  <subfield code="c">Austin, Tex.</subfield>'
        '  <subfield code="d">13-17 Dec 1982</subfield>'
        '  <subfield code="g">C82-12-13</subfield>'
        '  <subfield code="x">1982-12-13</subfield>'
        '  <subfield code="y">1982-12-17</subfield>'
        '</datafield>'
    )  # record/965081

    expected = [
        {
            'country_code': 'US',
            'state': 'US-TX',
            'original_address': 'Austin, Tex.',
        }
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['address'], subschema) is None
    assert expected == result['address']
def test_series_and_series_name_and_number_from_411__a_and_411__a_n():
    schema = load_schema('conferences')
    subschema = schema['properties']['series']

    snippet = (
        '<record>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">CEC</subfield>'
        '  </datafield>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">ICMC</subfield>'
        '    <subfield code="n">2</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/964448

    expected = [
        {
            'name': 'CEC',
        },
        {
            'name': 'ICMC',
            'number': 2,
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['series'], subschema) is None
    assert expected == result['series']
def test_series_name_and_number_and_series_number_from_411__a_n_and_411__a():
    schema = load_schema('conferences')
    subschema = schema['properties']['series']

    snippet = (
        '<record>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">EDS</subfield>'
        '    <subfield code="n">13</subfield>'
        '  </datafield>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '     <subfield code="a">BLOIS</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/980229

    expected = [
        {
            'name': 'EDS',
            'number': 13,
        },
        {
            'name': 'BLOIS',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['series'], subschema) is None
    assert expected == result['series']
def test_address_from_111__a_double_c_d_e_g_x_y():
    schema = load_schema('conferences')
    subschema = schema['properties']['address']

    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">16th High-Energy Physics International Conference in Quantum Chromodynamics</subfield>'
        '  <subfield code="c">QCD 12</subfield>'
        '  <subfield code="c">Montpellier, France</subfield>'
        '  <subfield code="d">2-7 Jul 2012</subfield>'
        '  <subfield code="e">QCD 12</subfield>'
        '  <subfield code="g">C12-07-02</subfield>'
        '  <subfield code="x">2012-07-02</subfield>'
        '  <subfield code="y">2012-07-07</subfield>'
        '</datafield>'
    )  # record/1085463

    expected = [
        {
            'original_address': 'QCD 12'
        },
        {
            'country_code': 'FR',
            'original_address': 'Montpellier, France'
        }
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['address'], subschema) is None
    assert expected == result['address']
def test_contact_details_from_multiple_marcxml_270():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
Esempio n. 13
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
def test_address_from_111__a_c_e_g_x_y_and_270__b():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC</subfield>'
        '    <subfield code="c">Cleveland, Ohio, USA</subfield>'
        '    <subfield code="e">BLV 2017</subfield>'
        '    <subfield code="g">C17-05-15</subfield>'
        '    <subfield code="x">2017-05-15</subfield>'
        '    <subfield code="y">2017-05-18</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="b">Case Western Reserve University</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1353313

    expected = [
        {
            'original_address': 'Cleveland, Ohio, USA',
            'country_code': 'US',
            'state': 'US-OH',
        },
        {
            'original_address': 'Case Western Reserve University',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_address_from_111__a_c_d_g_x_y_and_111__c():
    schema = load_schema('conferences')
    subschema = schema['properties']['address']

    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">Low dimensional physics and gauge principles</subfield>'
        '    <subfield code="c">Yerevan, Armenia</subfield>'
        '    <subfield code="d">21-29 Sep 2011</subfield>'
        '    <subfield code="g">C11-09-21.2</subfield>'
        '    <subfield code="x">2011-09-21</subfield>'
        '    <subfield code="y">2011-09-29</subfield>'
        '  </datafield>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Tbilisi, Georgia</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1220831

    expected = [
        {
            'country_code': 'AM',
            'original_address': 'Yerevan, Armenia',
        },
        {
            'country_code': 'GE',
            'original_address': 'Tbilisi, Georgia',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['address'], subschema) is None
    assert expected == result['address']
def test_acronym_from_111__a_c_d_double_e_g_x_y():
    schema = load_schema('conferences')
    subschema = schema['properties']['acronym']

    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">11th international vacuum congress and 7th international conference on solid surfaces</subfield>'
        '  <subfield code="c">Cologne, Germany</subfield>'
        '  <subfield code="d">25 – 29 Sep 1989</subfield>'
        '  <subfield code="e">IVC-11</subfield>'
        '  <subfield code="e">ICSS-7</subfield>'
        '  <subfield code="g">C89-09-25.3</subfield>'
        '  <subfield code="x">1989-09-25</subfield>'
        '  <subfield code="y">1989-09-29</subfield>'
        '</datafield>'
    )  # record/1308774

    expected = [
        'IVC-11',
        'ICSS-7',
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['acronym'], subschema) is None
    assert expected == result['acronym']
def test_double_series_name_and_number_from_double_411__a_n():
    schema = load_schema('conferences')
    subschema = schema['properties']['series']

    snippet = (
        '<record>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">ICHEP</subfield>'
        '    <subfield code="n">5</subfield>'
        '  </datafield>'
        '  <datafield tag="411" ind1=" " ind2=" ">'
        '    <subfield code="a">Rochester</subfield>'
        '    <subfield code="n">5</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/974856

    expected = [
        {
            'name': 'ICHEP',
            'number': 5,
        },
        {
            'name': 'Rochester',
            'number': 5,
        }
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['series'], subschema) is None
    assert expected == result['series']
Esempio n. 18
0
def test_series_number_from_411__n():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="n">7</subfield>'
               '</datafield>')  # record/1447029

    result = conferences.do(create_record(snippet))

    assert 'series' not in result
def test_series_number_from_411__n():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="n">7</subfield>'
               '</datafield>')  # record/1447029

    expected = 7
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series_number']
def test_series_number_from_411__n():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="n">7</subfield>'
        '</datafield>'
    )  # record/1447029

    result = conferences.do(create_record(snippet))

    assert 'series' not in result
def test_series_number_from_411__n():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="n">7</subfield>'
        '</datafield>'
    )  # record/1447029

    expected = 7
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series_number']
Esempio n. 22
0
def create_record(data, force=False, dry_run=False):
    record = marc_create_record(data)
    recid = None
    if '001' in record:
        recid = int(record['001'][0])
    if not dry_run and recid:
        prod_record = InspireProdRecords(recid=recid)
        prod_record.marcxml = data
    try:
        if _collection_in_record(record, 'institution'):
            json = strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            json = strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            json = strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            json = strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            json = strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            json = strip_empty_values(conferences.do(record))
        else:
            json = strip_empty_values(hep.do(record))
        if dry_run:
            return recid, json

        if force and any(key in json for key in ('control_number', 'recid')):
            try:
                control_number = json['control_number']
            except KeyError:
                control_number = json['recid']
            control_number = int(control_number)
            # Searches if record already exists.
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
            if recid:
                prod_record.successful = True
                db.session.merge(prod_record)
            logger.info("Elaborated record {}".format(control_number))
            return control_number, dict(record)
    except Exception:
        if recid:
            prod_record.successful = False
            db.session.merge(prod_record)
            logger.exception("Error in elaborating record ID {}".format(recid))
        raise
def test_series_from_411__a():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="a">DPF Series</subfield>'
               '</datafield>')  # record/1430017

    expected = [
        'DPF Series',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series']
def test_series_and_series_number_from_411__a_n():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="a">FPCP</subfield>'
               '  <subfield code="n">16</subfield>'
               '</datafield>')  # record/1468357

    result = clean_record(conferences.do(create_record(snippet)))

    assert result['series_number'] == 16
    assert result['series'] == [
        'FPCP',
    ]
def test_note_from__500_a():
    snippet = (
        '<datafield tag="500" ind1=" " ind2=" ">'
        '  <subfield code="a">Same conf. as Kyoto 1975: none in intervening years</subfield>'
        '</datafield>')  # record/963579

    expected = [
        'Same conf. as Kyoto 1975: none in intervening years',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['note']
def test_short_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/</subfield>'
        '</datafield>')  # record/1326067

    expected = [{
        'value':
        u'QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/',
    }]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['short_description']
def test_series_from_double_411__a():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="a">Tamm Theory Department</subfield>'
               '  <subfield code="a">Sakharov</subfield>'
               '</datafield>')  # record/969879

    expected = [
        'Tamm Theory Department',
        'Sakharov',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series']
def test_series_from_411__a():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="a">DPF Series</subfield>'
        '</datafield>'
    )  # record/1430017

    expected = [
        'DPF Series',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series']
def test_note_from__500_a():
    snippet = (
        '<datafield tag="500" ind1=" " ind2=" ">'
        '  <subfield code="a">Same conf. as Kyoto 1975: none in intervening years</subfield>'
        '</datafield>'
    )  # record/963579

    expected = [
        'Same conf. as Kyoto 1975: none in intervening years',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['note']
def test_series_and_series_number_from_411__a_n():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="a">FPCP</subfield>'
        '  <subfield code="n">16</subfield>'
        '</datafield>'
    )  # record/1468357

    result = clean_record(conferences.do(create_record(snippet)))

    assert result['series_number'] == 16
    assert result['series'] == [
        'FPCP',
    ]
Esempio n. 31
0
def create_record(recid, record, force=False, dry_run=False, validation=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if validation:
        try:
            validate(json)
        except ValidationError as err:
            errors = "ValidationError: Record {0}: {1}".format(recid, err)
            current_app.logger.warning(errors)

    if dry_run:
        return errors, recid, json

    if force and any(key in json for key in ('control_number', 'recid')):
        try:
            control_number = json['control_number']
        except KeyError:
            control_number = json['recid']
        control_number = int(control_number)
        # Searches if record already exists.
        with db.session.begin_nested():
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
        logger.info("Elaborated record {}".format(control_number))
        return errors, control_number, dict(record)
def test_short_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/</subfield>'
        '</datafield>'
    )  # record/1326067

    expected = [
        {
            'value': u'QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/',
        }
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['short_description']
def test_alternative_titles_from_marcxml_711():
    snippet = ('<record>'
               '  <datafield tag="711" ind1=" " ind2=" ">'
               '    <subfield code="a">GCACSE16</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'GCACSE16',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['alternative_titles']
Esempio n. 34
0
def test_series_name_and_number_from_411__a_n():
    snippet = ('<datafield tag="411" ind1=" " ind2=" ">'
               '  <subfield code="a">FPCP</subfield>'
               '  <subfield code="n">16</subfield>'
               '</datafield>')  # record/1468357

    expected = [
        {
            'name': 'FPCP',
            'number': 16,
        },
    ]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']
def test_address_from_marcxml__111_c():
    snippet = ('<record>'
               '  <datafield tag="111" ind1=" " ind2=" ">'
               '    <subfield code="c">Austin, Tex.</subfield>'
               '  </datafield>'
               '</record>')

    expected = [{
        'country_code': 'US',
        'state': 'US-TX',
        'original_address': 'Austin, Tex.'
    }]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_series_from_double_411__a():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="a">Tamm Theory Department</subfield>'
        '  <subfield code="a">Sakharov</subfield>'
        '</datafield>'
    )  # record/969879

    expected = [
        'Tamm Theory Department',
        'Sakharov',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['series']
def test_address_from_270__b():
    snippet = ('<record>'
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="b">British Columbia</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'country_code': 'CA',
            'original_address': 'British Columbia',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_acronym_from_111__e():
    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">16th Conference on Flavor Physics and CP Violation</subfield>'
        '  <subfield code="c">Hyderabad, INDIA</subfield>'
        '  <subfield code="e">FPCP 2018</subfield>'
        '  <subfield code="g">C18-07-09</subfield>'
        '  <subfield code="x">2018-07-09</subfield>'
        '  <subfield code="y">2018-07-12</subfield>'
        '</datafield>')  # record/1468357

    expected = [
        'FPCP 2018',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['acronym']
def test_alternative_titles_from_marcxml_711():
    snippet = (
        '<record>'
        '  <datafield tag="711" ind1=" " ind2=" ">'
        '    <subfield code="a">GCACSE16</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'GCACSE16',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['alternative_titles']
def test_public_notes_from__500_a():
    schema = load_schema('conferences')
    subschema = schema['properties']['public_notes']

    snippet = (
        '<datafield tag="500" ind1=" " ind2=" ">'
        '  <subfield code="a">Same conf. as Kyoto 1975: none in intervening years</subfield>'
        '</datafield>'
    )  # record/963579

    expected = [
        {'value': 'Same conf. as Kyoto 1975: none in intervening years'},
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['public_notes'], subschema) is None
    assert expected == result['public_notes']
Esempio n. 41
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return clean_record(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        return clean_record(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        return clean_record(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        return clean_record(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return clean_record(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        return clean_record(conferences.do(record))
    else:
        return clean_record(hep.do(record))
def test_series_name_and_number_from_411__a_n():
    snippet = (
        '<datafield tag="411" ind1=" " ind2=" ">'
        '  <subfield code="a">FPCP</subfield>'
        '  <subfield code="n">16</subfield>'
        '</datafield>'
    )  # record/1468357

    expected = [
        {
            'name': 'FPCP',
            'number': 16,
        },
    ]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']
Esempio n. 43
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return institutions.do(record)
    elif _collection_in_record(record, 'experiment'):
        return experiments.do(record)
    elif _collection_in_record(record, 'journals'):
        return journals.do(record)
    elif _collection_in_record(record, 'hepnames'):
        return hepnames.do(record)
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return jobs.do(record)
    elif _collection_in_record(record, 'conferences'):
        return conferences.do(record)
    else:
        return hep.do(record)
def test_contact_details_from_marcxml_270_single_p_single_m():
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
def test_acronym_from_111__e():
    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">16th Conference on Flavor Physics and CP Violation</subfield>'
        '  <subfield code="c">Hyderabad, INDIA</subfield>'
        '  <subfield code="e">FPCP 2018</subfield>'
        '  <subfield code="g">C18-07-09</subfield>'
        '  <subfield code="x">2018-07-09</subfield>'
        '  <subfield code="y">2018-07-12</subfield>'
        '</datafield>'
    )  # record/1468357

    expected = [
        'FPCP 2018',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['acronym']
def test_series_and_series_number_from_411__a_n_and_411_a():
    snippet = ('<record>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">Rencontres de Moriond</subfield>'
               '    <subfield code="n">51</subfield>'
               '  </datafield>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">Moriond EW</subfield>'
               '  </datafield>'
               '</record>')  # record/1404073

    result = clean_record(conferences.do(create_record(snippet)))

    assert result['series_number'] == 51
    assert result['series'] == [
        'Rencontres de Moriond',
        'Moriond EW',
    ]
def test_address_from_270__b():
    snippet = (
        '<record>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="b">British Columbia</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'country_code': 'CA',
            'original_address': 'British Columbia',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
Esempio n. 48
0
def test_series_name_and_number_from_411__n_and_411__a_n():
    snippet = ('<record>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="n">3</subfield>'
               '  </datafield>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">WIN</subfield>'
               '    <subfield code="n">3</subfield>'
               '  </datafield>'
               '</record>')  # record/963914

    expected = [{
        'name': 'WIN',
        'number': 3,
    }]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']
def test_note_from__double_500_a():
    snippet = (
        '<record>'
        '  <datafield tag="500" ind1=" " ind2=" ">'
        '    <subfield code="a">Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat</subfield>'
        '  </datafield>'
        '  <datafield tag="500" ind1=" " ind2=" ">'
        '    <subfield code="a">Will be published in: JACoW</subfield>'
        '  </datafield>'
        '</record>')  # record/1445071

    expected = [
        'Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat',
        'Will be published in: JACoW',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['note']
def test_alternative_titles_marcxml_711_with_b():
    snippet = (
        '<record>'
        '  <datafield tag="711" ind1=" " ind2=" ">'
        '    <subfield code="a">XX Riunione Nazionale di Elettromagnetismo</subfield>'
        '    <subfield code="b">Padova</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'XX Riunione Nazionale di Elettromagnetismo',
            'searchable_title': 'Padova',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['alternative_titles']
def test_contact_details_from_marcxml_270_single_p_double_m():
    """One person having two e-mail addresses. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner'
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
def test_note_from_500__multiple_a():
    snippet = (
        '<datafield tag="500" ind1=" " ind2=" ">'
        '  <subfield code="a">(BSS2011) Trends in Modern Physics: 19 - 21 August, 2011</subfield>'
        '  <subfield code="a">(BS2011) Cosmology and Particle Physics Beyond the Standard Models: 21-17 August, 2011</subfield>'
        '  <subfield code="a">(JW2011) Scientific and Human Legacy of Julius Wess: 27-28 August, 2011</subfield>'
        '  <subfield code="a">(BW2011) Particle Physcs from TeV to Plank Scale: 28 August - 1 September, 2011</subfield>'
        '</datafield>')

    expected = [
        '(BSS2011) Trends in Modern Physics: 19 - 21 August, 2011',
        '(BS2011) Cosmology and Particle Physics Beyond the Standard Models: 21-17 August, 2011',
        '(JW2011) Scientific and Human Legacy of Julius Wess: 27-28 August, 2011',
        '(BW2011) Particle Physcs from TeV to Plank Scale: 28 August - 1 September, 2011',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['note']
def test_contact_details_from_marcxml_270_double_p_single_m():
    """Two people having same e-mail address. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '    <subfield code="p">Boogeyman</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'email': '*****@*****.**',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
def test_address_from_111__a_c_d_g_x_y():
    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">Twentieth Power Modulator Symposium, 1992</subfield>'
        '  <subfield code="d">23-25 Jun 1992</subfield>'
        '  <subfield code="x">1992-06-23</subfield>'
        '  <subfield code="c">UNITED STATES</subfield>'
        '  <subfield code="g">C92-06-23.1</subfield>'
        '  <subfield code="y">1992-06-25</subfield>'
        '</datafield>')

    expected = [
        {
            'country_code': 'US',
            'original_address': 'UNITED STATES',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_acronym_from_111__e_e():
    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">11th international vacuum congress and 7th international conference on solid surfaces</subfield>'
        '  <subfield code="c">Cologne, Germany</subfield>'
        '  <subfield code="d">25 – 29 Sep 1989</subfield>'
        '  <subfield code="e">IVC-11</subfield>'
        '  <subfield code="e">ICSS-7</subfield>'
        '  <subfield code="g">C89-09-25.3</subfield>'
        '  <subfield code="x">1989-09-25</subfield>'
        '  <subfield code="y">1989-09-29</subfield>'
        '</datafield>')  # record/1308774

    expected = [
        'IVC-11',
        'ICSS-7',
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['acronym']
def test_titles_from_marcxml_111():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">NASA Laboratory Astrophysics Workshop</subfield>'
        '    <subfield code="d">14-16 Feb 2006</subfield>'
        '    <subfield code="x">2006-02-14</subfield>'
        '    <subfield code="c">Las Vegas, Nevada</subfield>'
        '    <subfield code="g">C06-02-14</subfield>'
        '    <subfield code="y">2006-02-16</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'NASA Laboratory Astrophysics Workshop',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['titles']
Esempio n. 57
0
def test_double_series_name_from_double_411__a():
    snippet = ('<record>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">SNPS</subfield>'
               '  </datafield>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">NSS</subfield>'
               '  </datafield>'
               '</record>')  # record/964177

    expected = [
        {
            'name': 'SNPS',
        },
        {
            'name': 'NSS',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']
Esempio n. 58
0
def test_double_series_name_and_number_from_double_411__a_n():
    snippet = ('<record>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">ICHEP</subfield>'
               '    <subfield code="n">5</subfield>'
               '  </datafield>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">Rochester</subfield>'
               '    <subfield code="n">5</subfield>'
               '  </datafield>'
               '</record>')  # record/974856

    expected = [{
        'name': 'ICHEP',
        'number': 5,
    }, {
        'name': 'Rochester',
        'number': 5,
    }]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']
def test_titles_from_marcxml_111_with_two_a():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">Conférence IAP 2013</subfield>'
        '    <subfield code="a">75 Anniversary Conference</subfield>'
        '    <subfield code="b">The origin of the Hubble sequence</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': u'Conférence IAP 2013',
            'subtitle': 'The origin of the Hubble sequence',
        },
        {
            'title': '75 Anniversary Conference',
            'subtitle': 'The origin of the Hubble sequence',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['titles']
Esempio n. 60
0
def test_series_name_and_number_and_series_number_from_411__a_n_and_411__a():
    snippet = ('<record>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '    <subfield code="a">EDS</subfield>'
               '    <subfield code="n">13</subfield>'
               '  </datafield>'
               '  <datafield tag="411" ind1=" " ind2=" ">'
               '     <subfield code="a">BLOIS</subfield>'
               '  </datafield>'
               '</record>')  # record/980229

    expected = [
        {
            'name': 'EDS',
            'number': 13,
        },
        {
            'name': 'BLOIS',
        },
    ]
    result = conferences.do(create_record(snippet))

    assert expected == result['series']