def test_address_from_marcxml_371__a_b_c_d_double_e_g():
    snippet = (
        '<datafield tag="371" ind1=" " ind2=" ">'
        '  <subfield code="a">Philosophenweg 16</subfield>'
        '  <subfield code="b">Heidelberg</subfield>'
        '  <subfield code="c">Baden-Wuerttemberg</subfield>'
        '  <subfield code="d">Germany</subfield>'
        '  <subfield code="e">69120</subfield>'
        '  <subfield code="e">DE-119</subfield>'
        '  <subfield code="g">DE</subfield>'
        '</datafield>'
    )

    expected = [
        {
            'city': 'Heidelberg',
            'country': 'Germany',
            'country_code': 'DE',
            'state': 'Baden-Wuerttemberg',
            'original_address': [
                'Philosophenweg 16',
            ],
            'postal_code': '69120, DE-119',
        }
    ]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['address']
def test_contact_details_from_multiple_marcxml_270():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = strip_empty_values(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
def test_address_from_marcxml_371__a_b_c_d_e_double_g():
    snippet = (
        '<datafield tag="371" ind1=" " ind2=" ">'
        '  <subfield code="a">Philosophenweg 16</subfield>'
        '  <subfield code="b">Heidelberg</subfield>'
        '  <subfield code="c">Baden-Wuerttemberg</subfield>'
        '  <subfield code="d">Germany</subfield>'
        '  <subfield code="e">69120</subfield>'
        '  <subfield code="g">DE</subfield>'
        '  <subfield code="g">DE</subfield>'
        '</datafield>'
    )

    expected = [
        {
            "city": "Heidelberg",
            "country": "Germany",
            "country_code": "DE",
            "state": "Baden-Wuerttemberg",
            "original_address": [
                "Philosophenweg 16",
            ],
            "postal_code": "69120",
        },
    ]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['address']
def test_address_from_multiple_marcxml__111_c():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Austin, Tex.</subfield>'
        '  </datafield>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Den Haag, Nederlands</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'country_code': 'US',
            'state': 'US-TX',
            'original_address': 'Austin, Tex.'
        },
        {
            'country_code': 'NL',
            'original_address': 'Den Haag, Nederlands'
        },
    ]
    result = strip_empty_values(conferences.do(create_record(snippet)))

    assert expected == result['address']
예제 #5
0
파일: base.py 프로젝트: nikpap/inspire-next
def collections(self, key, value):
    """Collection this record belongs to."""
    value = utils.force_list(value)

    def get_value(value):
        primary = ''
        if isinstance(value.get('a'), list):
            primary = value.get('a')[0]
        else:
            primary = value.get('a')
        return {
            'primary': primary,
            'secondary': value.get('b'),
            'deleted': value.get('c'),
        }

    collections = self.get('collections', [])

    for val in value:
        collections.append(get_value(val))

    contains_list = False
    for element in collections:
        for k, v in enumerate(element):
            if isinstance(element[v], list):
                contains_list = True
                break
    if contains_list:
        return strip_empty_values(collections)
    else:
        return inspire_dojson_utils.remove_duplicates_from_list_of_dicts(
            collections)
예제 #6
0
def collections(self, key, value):
    """Collection this record belongs to."""
    value = utils.force_list(value)

    def get_value(value):
        primary = ''
        if isinstance(value.get('a'), list):
            primary = value.get('a')[0]
        else:
            primary = value.get('a')
        return {
            'primary': primary,
            'secondary': value.get('b'),
            'deleted': value.get('c'),
        }

    collections = self.get('collections', [])

    for val in value:
        collections.append(get_value(val))

    contains_list = False
    for element in collections:
        for k, v in enumerate(element):
            if isinstance(element[v], list):
                contains_list = True
                break
    if contains_list:
        return strip_empty_values(collections)
    else:
        return inspire_dojson_utils.remove_duplicates_from_list_of_dicts(
            collections)
예제 #7
0
def update():
    """View for INSPIRE author update form."""
    from dojson.contrib.marc21.utils import create_record
    from inspirehep.dojson.hepnames import hepnames

    recid = request.values.get('recid', 0, type=int)

    data = {}
    if recid:
        try:
            url = os.path.join(
                current_app.config["AUTHORS_UPDATE_BASE_URL"],
                "record", str(recid), "export", "xm")
            xml = requests.get(url)
            record_regex = re.compile(
                r"\<record\>.*\<\/record\>", re.MULTILINE + re.DOTALL)
            xml_content = record_regex.search(xml.content).group()

            data = strip_empty_values(
                hepnames.do(create_record(xml_content)))  # .encode("utf-8")
            convert_for_form(data)
        except requests.exceptions.RequestException:
            pass
        data["recid"] = recid
    else:
        return redirect(url_for("inspirehep_authors_holdingpen.new"))
    form = AuthorUpdateForm(data=data, is_update=True)
    ctx = {
        "action": url_for('.submitupdate'),
        "name": "authorUpdateForm",
        "id": "authorUpdateForm",
    }

    # FIXME create template in authors module
    return render_template('authors/forms/update_form.html', form=form, **ctx)
예제 #8
0
def update():
    """View for INSPIRE author update form."""
    from dojson.contrib.marc21.utils import create_record
    from inspirehep.dojson.hepnames import hepnames

    recid = request.values.get('recid', 0, type=int)

    data = {}
    if recid:
        try:
            url = os.path.join(current_app.config["AUTHORS_UPDATE_BASE_URL"],
                               "record", str(recid), "export", "xm")
            xml = requests.get(url)
            record_regex = re.compile(r"\<record\>.*\<\/record\>",
                                      re.MULTILINE + re.DOTALL)
            xml_content = record_regex.search(xml.content).group()

            data = strip_empty_values(hepnames.do(
                create_record(xml_content)))  # .encode("utf-8")
            convert_for_form(data)
        except requests.exceptions.RequestException:
            pass
        data["recid"] = recid
    else:
        return redirect(url_for("inspirehep_authors.new"))
    form = AuthorUpdateForm(data=data)
    ctx = {
        "action": url_for('.submitupdate'),
        "name": "authorUpdateForm",
        "id": "authorUpdateForm",
    }

    # FIXME create template in authors module
    return render_template('authors/forms/update_form.html', form=form, **ctx)
예제 #9
0
def test_positions_from_371__a_m_r_z():
    snippet = (
        '<datafield tag="371" ind1=" " ind2=" ">'
        '  <subfield code="a">Antwerp U.</subfield>'
        '  <subfield code="m">[email protected]</subfield>'
        '  <subfield code="r">SENIOR</subfield>'
        '  <subfield code="z">Current</subfield>'
        '</datafield>'
    )  # record/997958

    expected = [
        {
            'curated_relation': False,
            'email': '*****@*****.**',
            'institution': {
                'name': 'Antwerp U.',
            },
            'rank': 'SENIOR',
            '_rank': 'SENIOR',
            'status': 'Current',
        },
    ]
    result = strip_empty_values(hepnames.do(create_record(snippet)))

    assert expected == result['positions']
def test_extra_words_from_410__decuple_g():
    snippet = (
        '<datafield tag="410" ind1=" " ind2=" ">'
        '  <subfield code="g">Institut Theoretische Physik,</subfield>'
        '  <subfield code="g">RWTH, Inst.</subfield>'
        '  <subfield code="g">institute A</subfield>'
        '  <subfield code="g">III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West</subfield>'
        '  <subfield code="g">physics</subfield>'
        '  <subfield code="g">52056</subfield>'
        '  <subfield code="g">D-52056</subfield>'
        '  <subfield code="g">DE-52056</subfield>'
        '  <subfield code="g">phys</subfield>'
        '  <subfield code="g">I. Physikalisches Institut</subfield>'
        '</datafield>'
    )  # record/902624

    expected = [
        'Institut Theoretische Physik,',
        'RWTH, Inst.',
        'institute A',
        'III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West',
        'physics',
        '52056',
        'D-52056',
        'DE-52056',
        'phys',
        'I. Physikalisches Institut',
    ]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['extra_words']
예제 #11
0
def test_multiple_issn_from_marcxml_022():
    """Test multiple ISSNs."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2349-2716</subfield>'
        '    <subfield code="b">Online</subfield>'
        '  </datafield>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2349-6088</subfield>'
        '    <subfield code="b">Print</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'medium': 'online',
            'value': '2349-2716',
        },
        {
            'medium': 'print',
            'value': '2349-6088',
        },
    ]
    result = strip_empty_values(journals.do(create_record(snippet)))

    assert expected == result['issn']
예제 #12
0
def test_hidden_notes_from_595__a_9_and_595__double_a_9():
    snippet = (
        '<record>'
        '  <datafield tag="595" ind1=" " ind2=" ">'
        '    <subfield code="9">SPIRES-HIDDEN</subfield>'
        '    <subfield code="a">Title changed from ALLCAPS</subfield>'
        '  </datafield>'
        '  <datafield tag="595" ind1=" " ind2=" ">'
        '    <subfield code="9">SPIRES-HIDDEN</subfield>'
        '    <subfield code="a">TeXtitle from script</subfield>'
        '    <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/109310

    expected = [
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'Title changed from ALLCAPS',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'TeXtitle from script',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = strip_empty_values(hep.do(create_record(snippet)))

    assert expected == result['hidden_notes']
예제 #13
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
예제 #14
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
예제 #15
0
def test_single_doi():
    snippet_single_doi = ('<record><datafield tag="024" ind1="7" ind2=" ">'
                               '<subfield code="2">DOI</subfield>'
                               '<subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
                               '</datafield></record>')

    x = create_record(snippet_single_doi)
    assert (strip_empty_values(hep.do(x))['dois'] ==
            [{'value': '10.1088/0264-9381/31/24/245004'}])
def test_core_from_690c_a_noncore():
    snippet = (
        '<datafield tag="690" ind1="C" ind2=" ">'
        '  <subfield code=a">NONCORE</subfield>'
        '</datafield>'
    )  # record/916025

    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert not result['core']
def test_non_public_notes_from_667__a():
    snippet = (
        '<datafield tag="667" ind1=" " ind2=" ">'
        '  <subfield code="a">Former ICN = Negev U.</subfield>'
        '</datafield>'
    )  # record/902663

    expected = ['Former ICN = Negev U.']
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['non_public_notes']
def test_field_activity_from_372__a():
    snippet = (
        '<datafield tag="372" ind1=" " ind2=" ">'
        '  <subfield code="a">Research center</subfield>'
        '</datafield>'
    )

    expected = ['Research center']
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['field_activity']
def test_name_from_110__a():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">Mid-America Christian U.</subfield>'
        '</datafield>'
    )  # record/1439728

    expected = [['Mid-America Christian U.']]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['name']
def test_hidden_notes_from_595__a():
    snippet = (
        '<datafield tag="595" ind1=" " ind2=" ">'
        '  <subfield code="a">The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA</subfield>'
        '</datafield>'
    )  # record/902879

    expected = [u'The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA']
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['hidden_notes']
def test_timezone_from_043__t():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">'
        '  <subfield code="t">+05</subfield>'
        '</datafield>'
    )  # record/902635

    expected = ['+05']
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['timezone']
def test_no_location_from_invalid_034__d_f():
    snippet = (
        '<datafield tag="034" ind1=" " ind2=" ">'
        '  <subfield code="d">foo</subfield>'
        '  <subfield code="f">bar</subfield>'
        '</datafield>'
    )  # synthetic data

    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert 'location' not in result
예제 #23
0
def test_single_doi():
    snippet_single_doi = (
        '<record><datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
        '</datafield></record>')

    x = create_record(snippet_single_doi)
    assert (strip_empty_values(hep.do(x))['dois'] == [{
        'value':
        '10.1088/0264-9381/31/24/245004'
    }])
def test_name_from_110__a_b_u():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">Fukushima University</subfield>'
        '  <subfield code="b">Department of Physics</subfield>'
        '  <subfield code="u">Fukushima U.</subfield>'
        '</datafield>'
    )  # record/902812

    expected = [['Fukushima University', 'Fukushima U.']]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['name']
def test_location_from_034__f():
    snippet = (
        '<datafield tag="034" ind1=" " ind2=" ">'
        '  <subfield code="f">50.7736</subfield>'
        '</datafield>'
    )  # synthetic data

    expected = {
        'latitude': 50.7736,
    }
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['location']
def test_historical_data_from_6781_a():
    snippet = (
        '<datafield tag="678" ind1="1" ind2=" ">'
        '  <subfield code="a">Became IFH (Inst for Hochenergiephysik)in 1968. Since 1992 the official name of the Inst. is simply DESY Zeuthen. Changed 1/26/99 AMR</subfield>'
        '</datafield>'
    )  # record/902666

    expected = [
        'Became IFH (Inst for Hochenergiephysik)in 1968. Since 1992 the official name of the Inst. is simply DESY Zeuthen. Changed 1/26/99 AMR'
    ]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['historical_data']
예제 #27
0
def create_record(data, force=False, dry_run=False):
    record = marc_create_record(data)
    recid = None
    if '001' in record:
        recid = int(record['001'][0])
    if not dry_run and recid:
        prod_record = InspireProdRecords(recid=recid)
        prod_record.marcxml = data
    try:
        if _collection_in_record(record, 'institution'):
            json = strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            json = strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            json = strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            json = strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            json = strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            json = strip_empty_values(conferences.do(record))
        else:
            json = strip_empty_values(hep.do(record))
        if dry_run:
            return recid, json

        if force and any(key in json for key in ('control_number', 'recid')):
            try:
                control_number = json['control_number']
            except KeyError:
                control_number = json['recid']
            control_number = int(control_number)
            # Searches if record already exists.
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
            if recid:
                prod_record.successful = True
                db.session.merge(prod_record)
            logger.info("Elaborated record {}".format(control_number))
            return control_number, dict(record)
    except Exception:
        if recid:
            prod_record.successful = False
            db.session.merge(prod_record)
            logger.exception("Error in elaborating record ID {}".format(recid))
        raise
def test_name_from_110__b_t_u():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '   <subfield code="b">Institute of Physics</subfield>'
        '   <subfield code="t">Inst. Phys., Belgrade</subfield>'
        '   <subfield code="u">Belgrade, Inst. Phys.</subfield>'
        '</datafield>'
    )   # record/903416

    expected = [['Belgrade, Inst. Phys.', 'Inst. Phys., Belgrade']]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['name']
예제 #29
0
def test_publisher_from_643__b():
    snippet = (
        '<datafield tag="643" ind1=" " ind2=" ">'
        '  <subfield code="b">ANITA PUBLICATIONS, INDIA</subfield>'
        '</datafield>'
    )  # record/1211888

    expected = [
        'ANITA PUBLICATIONS, INDIA',
    ]
    result = strip_empty_values(journals.do(create_record(snippet)))

    assert expected == result['publisher']
def test_public_notes_from_680__a():
    snippet = (
        '<datafield tag="680" ind1=" " ind2=" ">'
        '  <subfield code="i">2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France</subfield>'
        '</datafield>'
    )  # record/902725

    expected = [
        u'2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France'
    ]
    result = strip_empty_values(institutions.do(create_record(snippet)))

    assert expected == result['public_notes']
예제 #31
0
def test_issn_from_marcxml_022_with_b_no_a():
    """Test ISSN in wrong subfield."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="b">9780486632827</subfield>'
        '  </datafield> '
        '</record>'
    )

    result = strip_empty_values(journals.do(create_record(snippet)))

    assert 'issn' not in result
예제 #32
0
def test_coden_from_030__a_2():
    snippet = (
        '<datafield tag="030" ind1=" " ind2=" ">'
        '  <subfield code="2">CODEN</subfield>'
        '  <subfield code="a">HERAS</subfield>'
        '</datafield>'
    )  # record/1211568

    expected = [
        'HERAS',
    ]
    result = strip_empty_values(journals.do(create_record(snippet)))

    assert expected == result['coden']
예제 #33
0
def references(self, key, value):
    """Produce list of references."""
    value = utils.force_list(value)

    def get_value(value):
        recid = None
        number = ''
        year = ''
        if '0' in value:
            try:
                recid = int(value.get('0'))
            except:
                pass
        if 'o' in value:
            try:
                number = int(value.get('o'))
            except:
                pass
        if 'y' in value:
            try:
                year = int(value.get('y'))
            except:
                pass
        return {
            'record': inspire_dojson_utils.get_record_ref(recid, 'literature'),
            'texkey': value.get('1'),
            'doi': value.get('a'),
            'collaboration': utils.force_list(value.get('c')),
            'editors': value.get('e'),
            'authors': utils.force_list(value.get('h')),
            'misc': utils.force_list(value.get('m')),
            'number': number,
            'isbn': value.get('i'),
            'publisher': utils.force_list(value.get('p')),
            'maintitle': value.get('q'),
            'report_number': utils.force_list(value.get('r')),
            'title': utils.force_list(value.get('t')),
            'url': utils.force_list(value.get('u')),
            'journal_pubnote': utils.force_list(value.get('s')),
            'raw_reference': utils.force_list(value.get('x')),
            'year': year,
        }

    references = self.get('references', [])

    for val in value:
        references.append(get_value(val))

    return inspire_dojson_utils.remove_duplicates_from_list(
        strip_empty_values(references))
예제 #34
0
def test_experiment_names_and_affiliation_from_marcxml_119():
    snippet = (
        '<record>'
        '  <datafield tag="119" ind1=" " ind2=" ">'
        '    <subfield code="a">CERN-ALPHA</subfield>'
        '    <subfield code="u">CERN</subfield>'
        '  </datafield>'
        '</record>'
    )

    result = strip_empty_values(experiments.do(create_record(snippet)))

    assert result['affiliation'][0] == 'CERN'
    assert result['experiment_names'][0]['title'] == 'CERN-ALPHA'
예제 #35
0
def references(self, key, value):
    """Produce list of references."""
    value = utils.force_list(value)

    def get_value(value):
        recid = ''
        number = ''
        year = ''
        if '0' in value:
            try:
                recid = int(value.get('0'))
            except:
                pass
        if 'o' in value:
            try:
                number = int(value.get('o'))
            except:
                pass
        if 'y' in value:
            try:
                year = int(value.get('y'))
            except:
                pass
        return {
            'recid': recid,
            'texkey': value.get('1'),
            'doi': value.get('a'),
            'collaboration': utils.force_list(value.get('c')),
            'editors': value.get('e'),
            'authors': utils.force_list(value.get('h')),
            'misc': utils.force_list(value.get('m')),
            'number': number,
            'isbn': value.get('i'),
            'publisher': utils.force_list(value.get('p')),
            'maintitle': value.get('q'),
            'report_number': utils.force_list(value.get('r')),
            'title': utils.force_list(value.get('t')),
            'url': utils.force_list(value.get('u')),
            'journal_pubnote': utils.force_list(value.get('s')),
            'raw_reference': utils.force_list(value.get('x')),
            'year': year,
        }
    references = self.get('references', [])

    for val in value:
        references.append(get_value(val))

    return inspire_dojson_utils.remove_duplicates_from_list(
        strip_empty_values(references))
예제 #36
0
def test_duplicate_doi():
    snippet_duplicate_doi = (
        '<record><datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="9">bibmatch</subfield>'
        '<subfield code="a">10.1088/1475-7516/2015/03/044</subfield>'
        '</datafield>'
        '<datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="a">10.1088/1475-7516/2015/03/044</subfield>'
        '</datafield></record>')

    x = create_record(snippet_duplicate_doi)
    assert (strip_empty_values(hep.do(x))['dois'] == [{
        'source':
        'bibmatch',
        'value':
        '10.1088/1475-7516/2015/03/044'
    }, {
        'value':
        '10.1088/1475-7516/2015/03/044'
    }])
예제 #37
0
def test_multiple_dois():
    snippet_multiple_dois = (
        '<record><datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="a">10.1103/PhysRevD.89.072002</subfield>'
        '</datafield>'
        '<datafield tag="024" ind1="7" ind2=" ">'
        '<subfield code="2">DOI</subfield>'
        '<subfield code="9">bibmatch</subfield>'
        '<subfield code="a">10.1103/PhysRevD.91.019903</subfield>'
        '</datafield></record>')

    x = create_record(snippet_multiple_dois)
    assert (strip_empty_values(hep.do(x))['dois'] == [{
        'value':
        '10.1103/PhysRevD.89.072002'
    }, {
        'source':
        'bibmatch',
        'value':
        '10.1103/PhysRevD.91.019903'
    }])
예제 #38
0
def test_strip_empty_values():
    obj = {
        '_foo': (),
        'foo': (1, 2, 3),
        '_bar': [],
        'bar': [1, 2, 3],
        '_baz': set(),
        'baz': set([1, 2, 3]),
        'qux': True,
        'quux': False,
        'plugh': 0,
    }

    expected = {
        'foo': (1, 2, 3),
        'bar': [1, 2, 3],
        'baz': set([1, 2, 3]),
        'qux': True,
        'quux': False,
        'plugh': 0,
    }
    result = strip_empty_values(obj)

    assert expected == result
예제 #39
0
def create_record(record, force=True, dry_run=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if dry_run:
        return errors, json

    return json
예제 #40
0
def test_strip_empty_values_returns_none_on_none():
    assert strip_empty_values(None) is None