def test_contact_details_from_multiple_marcxml_270():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['contact_details']
Example #2
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
Example #3
0
def test_description_from_multiple_520__a():
    snippet = (
        '<record>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">iii) DAMA/R&amp;D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&amp;D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.</subfield>'
        '  </datafield>'
        '</record>')  # record/1110568

    expected = [
        'DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002',
        'ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)',
        'iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003',
        'v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.',
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['description']
def test_spokespersons_from_702__a_i_z():
    schema = load_schema('experiments')
    subschema = schema['properties']['spokespersons']

    snippet = (
        '<datafield tag="702" ind1=" " ind2=" ">'
        '  <subfield code="a">Hogan, Craig J.</subfield>'
        '  <subfield code="i">INSPIRE-00090662</subfield>'
        '  <subfield code="z">Current</subfield>'
        '</datafield>'
    )  # record/1108189

    expected = [
        {
            'ids': [
                {
                    'schema': 'INSPIRE ID',
                    'value': 'INSPIRE-00090662',
                },
            ],
            'name': 'Hogan, Craig J.',
            'current': True,
            'curated_relation': False,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert validate(result['spokespersons'], subschema) is None
    assert expected == result['spokespersons']
Example #5
0
def test_related_experiments_from_double_510__a_w_0():
    snippet = ('<record>'
               '  <datafield tag="510" ind1=" " ind2=" ">'
               '    <subfield code="0">1108293</subfield>'
               '    <subfield code="a">XENON</subfield>'
               '    <subfield code="w">a</subfield>'
               '  </datafield>'
               '  <datafield tag="510" ind1=" " ind2=" ">'
               '    <subfield code="0">1386527</subfield>'
               '    <subfield code="a">XENON100</subfield>'
               '    <subfield code="w">a</subfield>'
               '  </datafield>'
               '</record>')  # record/1386519

    expected = [
        {
            'name': 'XENON',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1108293'
            },
            'relation': 'predecessor',
            'curated_relation': True,
        },
        {
            'name': 'XENON100',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1386527'
            },
            'relation': 'predecessor',
            'curated_relation': True,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['related_experiments']
def test_contact_details_from_multiple_marcxml_270():
    schema = load_schema('experiments')
    subschema = schema['properties']['contact_details']

    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert validate(result['contact_details'], subschema) is None
    assert expected == result['contact_details']
def test_date_started_and_date_completed_from_046():
    schema = load_schema('experiments')
    subschema_date_started = schema['properties']['date_started']
    subschema_date_completed = schema['properties']['date_started']

    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="s">1996</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="t">2002</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="x">yes</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1108324

    result = experiments.do(create_record(snippet))

    assert validate(result['date_started'], subschema_date_started) is None
    assert validate(result['date_completed'], subschema_date_completed) is None
    assert result['date_started'] == '1996'
    assert result['date_completed'] == '2002'
Example #8
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
def test_description_from_multiple_520__a():
    snippet = (
        '<record>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">iii) DAMA/R&amp;D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003</subfield>'
        '  </datafield>'
        '  <datafield tag="520" ind1=" " ind2=" ">'
        '    <subfield code="a">v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&amp;D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1110568

    expected = [
        'DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002',
        'ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)',
        'iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003',
        'v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.',
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['description']
def test_spokespersons_from_702__a_i_z():
    snippet = (
        '<datafield tag="702" ind1=" " ind2=" ">'
        '  <subfield code="a">Hogan, Craig J.</subfield>'
        '  <subfield code="i">INSPIRE-00090662</subfield>'
        '  <subfield code="z">Current</subfield>'
        '</datafield>'
    )  # record/1108189

    expected = [
        {
            'ids': [
                {
                    'type': 'INSPIRE ID',
                    'value': 'INSPIRE-00090662',
                },
            ],
            'name': 'Hogan, Craig J.',
            'current': True,
            'curated_relation': False,
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['spokespersons']
def test_experiment_names_and_affiliations_from_marcxml_119():
    schema = load_schema('experiments')
    subschema = schema['properties']['experiment_names']

    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">CERN-ALPHA</subfield>'
        '    <subfield code="u">CERN</subfield>'
        '    <subfield code="z">902725</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1108206

    result = experiments.do(create_record(snippet))

    assert validate(result['experiment_names'], subschema) is None
    assert result['experiment_names'] == [{'title': 'CERN-ALPHA'}]
    assert result['affiliations'] == [
        {
            'curated_relation': True,
            'name': 'CERN',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/902725',
            },
        },
    ]
def test_related_experiments_from_double_510__a_w_0():
    snippet = (
        '<record>'
        '  <datafield tag="510" ind1=" " ind2=" ">'
        '    <subfield code="0">1108293</subfield>'
        '    <subfield code="a">XENON</subfield>'
        '    <subfield code="w">a</subfield>'
        '  </datafield>'
        '  <datafield tag="510" ind1=" " ind2=" ">'
        '    <subfield code="0">1386527</subfield>'
        '    <subfield code="a">XENON100</subfield>'
        '    <subfield code="w">a</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1386519

    expected = [
        {
            'name': 'XENON',
            'record': {'$ref': 'http://localhost:5000/api/experiments/1108293'},
            'relation': 'predecessor',
            'curated_relation': True,
        },
        {
            'name': 'XENON100',
            'record': {'$ref': 'http://localhost:5000/api/experiments/1386527'},
            'relation': 'predecessor',
            'curated_relation': True,
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['related_experiments']
Example #13
0
def test_experiment_names_and_affiliations_from_marcxml_multiple_119():
    snippet = ('<record>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="a">LATTICE-UKQCD</subfield>'
               '  </datafield>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="u">Cambridge U.</subfield>'
               '  </datafield>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="u">Edinburgh U.</subfield>'
               '    <subfield code="z">902787</subfield>'
               '  </datafield>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="u">Swansea U.</subfield>'
               '  </datafield>'
               '</record>')  # record/1228417

    result = experiments.do(create_record(snippet))

    assert result['experiment_names'] == [{'title': 'LATTICE-UKQCD'}]
    assert result['affiliations'] == [{
        'curated_relation': False,
        'name': 'Cambridge U.'
    }, {
        'curated_relation': True,
        'name': 'Edinburgh U.',
        'record': {
            '$ref': 'http://localhost:5000/api/institutions/902787',
        },
    }, {
        'curated_relation': False,
        'name': 'Swansea U.'
    }]
Example #14
0
def test_collaboration_from_710__g():
    snippet = ('<datafield tag="710" ind1=" " ind2=" ">'
               '  <subfield code="g">DarkSide</subfield>'
               '</datafield>')  # record/1108199

    result = experiments.do(create_record(snippet))

    assert result['collaboration'] == 'DarkSide'
    assert 'collaboration_alternative_names' not in result
Example #15
0
def test_accelerator_from_693__a():
    snippet = ('<datafield tag="693" ind1=" " ind2=" ">'
               '  <subfield code="a">AD</subfield>'
               '</datafield>')  # record/1108206

    expected = 'AD'
    result = experiments.do(create_record(snippet))

    assert expected == result['accelerator']
Example #16
0
def create_record(data, force=False, dry_run=False):
    record = marc_create_record(data)
    recid = None
    if '001' in record:
        recid = int(record['001'][0])
    if not dry_run and recid:
        prod_record = InspireProdRecords(recid=recid)
        prod_record.marcxml = data
    try:
        if _collection_in_record(record, 'institution'):
            json = strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            json = strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            json = strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            json = strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            json = strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            json = strip_empty_values(conferences.do(record))
        else:
            json = strip_empty_values(hep.do(record))
        if dry_run:
            return recid, json

        if force and any(key in json for key in ('control_number', 'recid')):
            try:
                control_number = json['control_number']
            except KeyError:
                control_number = json['recid']
            control_number = int(control_number)
            # Searches if record already exists.
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
            if recid:
                prod_record.successful = True
                db.session.merge(prod_record)
            logger.info("Elaborated record {}".format(control_number))
            return control_number, dict(record)
    except Exception:
        if recid:
            prod_record.successful = False
            db.session.merge(prod_record)
            logger.exception("Error in elaborating record ID {}".format(recid))
        raise
def test_collaboration_from_710__g():
    snippet = (
        '<datafield tag="710" ind1=" " ind2=" ">'
        '  <subfield code="g">DarkSide</subfield>'
        '</datafield>'
    )  # record/1108199

    result = clean_record(experiments.do(create_record(snippet)))

    assert result['collaboration'] == 'DarkSide'
    assert 'collaboration_alternative_names' not in result
def test_accelerator_from_693__a():
    snippet = (
        '<datafield tag="693" ind1=" " ind2=" ">'
        '  <subfield code="a">AD</subfield>'
        '</datafield>'
    )  # record/1108206

    expected = 'AD'
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['accelerator']
Example #19
0
def test_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&amp;D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.</subfield>'
        '</datafield>')  # record/1108188

    expected = [
        'The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.'
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['description']
def test_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&amp;D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.</subfield>'
        '</datafield>'
    )  # record/1108188

    expected = [
        'The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.'
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['description']
def test_experiment_names_and_affiliation_from_marcxml_119():
    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">CERN-ALPHA</subfield>'
        '    <subfield code="u">CERN</subfield>'
        '  </datafield>'
        '</record>'
    )

    result = clean_record(experiments.do(create_record(snippet)))

    assert result['affiliation'][0] == 'CERN'
    assert result['experiment_names'][0]['title'] == 'CERN-ALPHA'
Example #22
0
def test_collaboration_from_double_710__g():
    snippet = ('<record>'
               '  <datafield tag="710" ind1=" " ind2=" ">'
               '    <subfield code="g">MiniBooNE</subfield>'
               '  </datafield>'
               '  <datafield tag="710" ind1=" " ind2=" ">'
               '    <subfield code="g">BooNE</subfield>'
               '  </datafield>'
               '</record>')  # record/1110641

    result = experiments.do(create_record(snippet))

    assert result['collaboration'] == 'BooNE'
    assert result['collaboration_alternative_names'] == ['MiniBooNE']
Example #23
0
def create_record(recid, record, force=False, dry_run=False, validation=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if validation:
        try:
            validate(json)
        except ValidationError as err:
            errors = "ValidationError: Record {0}: {1}".format(recid, err)
            current_app.logger.warning(errors)

    if dry_run:
        return errors, recid, json

    if force and any(key in json for key in ('control_number', 'recid')):
        try:
            control_number = json['control_number']
        except KeyError:
            control_number = json['recid']
        control_number = int(control_number)
        # Searches if record already exists.
        with db.session.begin_nested():
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
        logger.info("Elaborated record {}".format(control_number))
        return errors, control_number, dict(record)
def test_accelerator_from_693__a():
    schema = load_schema('experiments')
    subschema = schema['properties']['accelerator']

    snippet = (
        '<datafield tag="693" ind1=" " ind2=" ">'
        '  <subfield code="a">AD</subfield>'
        '</datafield>'
    )  # record/1108206

    expected = 'AD'
    result = experiments.do(create_record(snippet))

    assert validate(result['accelerator'], subschema) is None
    assert expected == result['accelerator']
def test_experiment_names_and_affiliation_from_marcxml_119_two_u():
    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">LATTICE-UKQCD</subfield>'
        '    <subfield code="u">Cambridge U.</subfield>'
        '    <subfield code="u">Edinburgh U.</subfield>'
        '  </datafield>'
        '</record>'
    )

    result = clean_record(experiments.do(create_record(snippet)))

    assert result['affiliation'] == ['Cambridge U.', 'Edinburgh U.']
    assert result['experiment_names'][0]['title'] == 'LATTICE-UKQCD'
Example #26
0
def test_date_started_from_046__q_s_and_046__r():
    snippet = ('<record>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="q">2009-08-19</subfield>'
               '    <subfield code="s">2009-11-30</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="r">2009-10-08</subfield>'
               '  </datafield>'
               '</record>')  # record/1318099

    expected = '2009-11-30'
    result = experiments.do(create_record(snippet))

    assert expected == result['date_started']
Example #27
0
def test_titles_from_marcxml_245():
    snippet = ('<record>'
               '  <datafield tag="245" ind1=" " ind2=" ">'
               '    <subfield code="a">The ALPHA experiment</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'The ALPHA experiment',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['titles']
Example #28
0
def test_title_variants_from_marcxml_419():
    snippet = ('<record>'
               '  <datafield tag="419" ind1=" " ind2=" ">'
               '    <subfield code="a">ALPHA</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'ALPHA',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['title_variants']
def test_collaboration_from_710__g():
    schema = load_schema('experiments')
    subschema = schema['properties']['collaboration']

    snippet = (
        '<datafield tag="710" ind1=" " ind2=" ">'
        '  <subfield code="g">DarkSide</subfield>'
        '</datafield>'
    )  # record/1108199

    result = experiments.do(create_record(snippet))

    assert validate(result['collaboration'], subschema) is None
    assert result['collaboration'] == 'DarkSide'
    assert 'collaboration_alternative_names' not in result
Example #30
0
def test_experiment_names_from_marcxml_119():
    snippet = ('<record>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="a">CERN-ALPHA</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'CERN-ALPHA',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['experiment_names']
def test_collaboration_from_double_710__g():
    snippet = (
        '<record>'
        '  <datafield tag="710" ind1=" " ind2=" ">'
        '    <subfield code="g">MiniBooNE</subfield>'
        '  </datafield>'
        '  <datafield tag="710" ind1=" " ind2=" ">'
        '    <subfield code="g">BooNE</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1110641

    result = clean_record(experiments.do(create_record(snippet)))

    assert result['collaboration'] == 'BooNE'
    assert result['collaboration_alternative_names'] == ['MiniBooNE']
Example #32
0
def test_date_started_from_046__q_and_046__r_and_046__x():
    snippet = ('<record>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="q">2010</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="r">2011-03-18</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="x">yes</subfield>'
               '  </datafield>'
               '</record>')  # record/1108188

    result = experiments.do(create_record(snippet))

    assert 'date_started' not in result
def test_title_variants_from_marcxml_419():
    snippet = (
        '<record>'
        '  <datafield tag="419" ind1=" " ind2=" ">'
        '    <subfield code="a">ALPHA</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'ALPHA',
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['title_variants']
def test_titles_from_marcxml_245():
    snippet = (
        '<record>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">The ALPHA experiment</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'The ALPHA experiment',
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['titles']
def test_experiment_names_from_marcxml_119():
    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">CERN-ALPHA</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'CERN-ALPHA',
        },
    ]
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['experiment_names']
def test_date_started_from_046__q_s_and_046__r():
    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="q">2009-08-19</subfield>'
        '    <subfield code="s">2009-11-30</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="r">2009-10-08</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1318099

    expected = '2009-11-30'
    result = clean_record(experiments.do(create_record(snippet)))

    assert expected == result['date_started']
Example #37
0
def test_contact_details_from_marcxml_270_single_p_single_m():
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['contact_details']
def test_titles_from_419__a():
    schema = load_schema('experiments')
    subschema = schema['properties']['titles']

    snippet = (
        '<datafield tag="419" ind1=" " ind2=" ">'
        '  <subfield code="a">ALPHA</subfield>'
        '</datafield>'
    )  # record/1108206

    expected = [
        {'title': 'ALPHA'},
    ]
    result = experiments.do(create_record(snippet))

    assert validate(result['titles'], subschema) is None
    assert expected == result['titles']
Example #39
0
def test_date_started_and_date_completed_from_046():
    snippet = ('<record>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="s">1996</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="t">2002</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="x">yes</subfield>'
               '  </datafield>'
               '</record>')  # record/1108324

    result = experiments.do(create_record(snippet))

    assert result['date_started'] == '1996'
    assert result['date_completed'] == '2002'
Example #40
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return clean_record(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        return clean_record(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        return clean_record(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        return clean_record(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return clean_record(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        return clean_record(conferences.do(record))
    else:
        return clean_record(hep.do(record))
Example #41
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return institutions.do(record)
    elif _collection_in_record(record, 'experiment'):
        return experiments.do(record)
    elif _collection_in_record(record, 'journals'):
        return journals.do(record)
    elif _collection_in_record(record, 'hepnames'):
        return hepnames.do(record)
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return jobs.do(record)
    elif _collection_in_record(record, 'conferences'):
        return conferences.do(record)
    else:
        return hep.do(record)
def test_spokespersons_from_double_702__a_i():
    schema = load_schema('experiments')
    subschema = schema['properties']['spokespersons']

    snippet = (
        '<record>'
        '  <datafield tag="702" ind1=" " ind2=" ">'
        '    <subfield code="a">Feldman, Gary</subfield>'
        '    <subfield code="i">INSPIRE-00080677</subfield>'
        '    <subfield code="x">1010209</subfield>'
        '  </datafield>'
        '  <datafield tag="702" ind1=" " ind2=" ">'
        '    <subfield code="a">Messier, Mark</subfield>'
        '    <subfield code="i">INSPIRE-00107105</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1402897

    expected = [
        {
            'ids': [
                {
                    'schema': 'INSPIRE ID',
                    'value': 'INSPIRE-00080677',
                },
            ],
            'name': 'Feldman, Gary',
            'record': {'$ref': 'http://localhost:5000/api/authors/1010209'},
            'curated_relation': True,
        },
        {
            'ids': [
                {
                    'schema': 'INSPIRE ID',
                    'value': 'INSPIRE-00107105',
                },
            ],
            'name': 'Messier, Mark',
            'curated_relation': False,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert validate(result['spokespersons'], subschema) is None
    assert expected == result['spokespersons']
Example #43
0
def test_contact_details_from_marcxml_270_double_p_single_m():
    """Two people having same e-mail address. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '    <subfield code="p">Boogeyman</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'email': '*****@*****.**',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['contact_details']
Example #44
0
def test_contact_details_from_marcxml_270_single_p_double_m():
    """One person having two e-mail addresses. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner'
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['contact_details']
def test_date_started_from_046__q_and_046__r_and_046__x():
    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="q">2010</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="r">2011-03-18</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="x">yes</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1108188

    result = clean_record(experiments.do(create_record(snippet)))

    assert 'date_started' not in result
def test_experiment_names_and_affiliations_from_marcxml_multiple_119():
    schema = load_schema('experiments')
    subschema = schema['properties']['experiment_names']

    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">LATTICE-UKQCD</subfield>'
        '  </datafield>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="u">Cambridge U.</subfield>'
        '  </datafield>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="u">Edinburgh U.</subfield>'
        '    <subfield code="z">902787</subfield>'
        '  </datafield>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="u">Swansea U.</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1228417

    result = experiments.do(create_record(snippet))

    assert validate(result['experiment_names'], subschema) is None
    assert result['experiment_names'] == [{'title': 'LATTICE-UKQCD'}]
    assert result['affiliations'] == [
        {
            'curated_relation': False,
            'name': 'Cambridge U.'
        },
        {
            'curated_relation': True,
            'name': 'Edinburgh U.',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/902787',
            },
        },
        {
            'curated_relation': False,
            'name': 'Swansea U.'
        }
    ]
def test_contact_details_from_marcxml_270_single_p_single_m():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
    ]
    result = strip_empty_values(experiments.do(create_record(snippet)))

    assert expected == result['contact_details']
def test_date_started_and_date_completed_from_046():
    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="s">1996</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="t">2002</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="x">yes</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1108324

    result = clean_record(experiments.do(create_record(snippet)))

    assert result['date_started'] == '1996'
    assert result['date_completed'] == '2002'
Example #49
0
def test_related_experiments_from_510__a_w_0():
    snippet = ('<datafield tag="510" ind1=" " ind2=" ">'
               '  <subfield code="0">1262631</subfield>'
               '  <subfield code="a">LZ</subfield>'
               '  <subfield code="w">b</subfield>'
               '</datafield>')  # record/1108192

    expected = [
        {
            'name': 'LZ',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1262631'
            },
            'relation': 'successor',
            'curated_relation': True,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['related_experiments']
Example #50
0
def test_spokespersons_from_double_702__a_i():
    snippet = ('<record>'
               '  <datafield tag="702" ind1=" " ind2=" ">'
               '    <subfield code="a">Feldman, Gary</subfield>'
               '    <subfield code="i">INSPIRE-00080677</subfield>'
               '    <subfield code="x">1010209</subfield>'
               '  </datafield>'
               '  <datafield tag="702" ind1=" " ind2=" ">'
               '    <subfield code="a">Messier, Mark</subfield>'
               '    <subfield code="i">INSPIRE-00107105</subfield>'
               '  </datafield>'
               '</record>')  # record/1402897

    expected = [
        {
            'ids': [
                {
                    'type': 'INSPIRE ID',
                    'value': 'INSPIRE-00080677',
                },
            ],
            'name': 'Feldman, Gary',
            'record': {
                '$ref': 'http://localhost:5000/api/authors/1010209'
            },
            'curated_relation': True,
        },
        {
            'ids': [
                {
                    'type': 'INSPIRE ID',
                    'value': 'INSPIRE-00107105',
                },
            ],
            'name': 'Messier, Mark',
            'curated_relation': False,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['spokespersons']
Example #51
0
def test_experiment_names_and_affiliations_from_marcxml_119():
    snippet = ('<record>'
               '  <datafield tag="119" ind1=" " ind2=" ">'
               '    <subfield code="a">CERN-ALPHA</subfield>'
               '    <subfield code="u">CERN</subfield>'
               '    <subfield code="z">902725</subfield>'
               '  </datafield>'
               '</record>')  # record/1108206

    result = experiments.do(create_record(snippet))

    assert result['experiment_names'] == [{'title': 'CERN-ALPHA'}]
    assert result['affiliations'] == [
        {
            'curated_relation': True,
            'name': 'CERN',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/902725',
            },
        },
    ]
Example #52
0
def test_multiple_title_variants_from_marcxml_419():
    snippet = ('<record>'
               '  <datafield tag="419" ind1=" " ind2=" ">'
               '    <subfield code="a">P-326</subfield>'
               '  </datafield>'
               '  <datafield tag="419" ind1=" " ind2=" ">'
               '    <subfield code="a">CERN-NA-048-3</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'P-326',
        },
        {
            'title': 'CERN-NA-048-3',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['title_variants']
Example #53
0
def test_spokespersons_from_702__a_i_z():
    snippet = ('<datafield tag="702" ind1=" " ind2=" ">'
               '  <subfield code="a">Hogan, Craig J.</subfield>'
               '  <subfield code="i">INSPIRE-00090662</subfield>'
               '  <subfield code="z">Current</subfield>'
               '</datafield>')  # record/1108189

    expected = [
        {
            'ids': [
                {
                    'type': 'INSPIRE ID',
                    'value': 'INSPIRE-00090662',
                },
            ],
            'name': 'Hogan, Craig J.',
            'current': True,
            'curated_relation': False,
        },
    ]
    result = experiments.do(create_record(snippet))

    assert expected == result['spokespersons']
Example #54
0
def create_record(record, force=True, dry_run=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if dry_run:
        return errors, json

    return json