def test_date_closed_from_046__i_and_046__l_an_email(): schema = load_schema('jobs') subschema_deadline_date = schema['properties']['deadline_date'] subschema_reference_email = schema['properties']['reference_email'] snippet = ( '<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">[email protected]</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">8888</subfield>' ' </datafield>' '</record>' ) # record/1089529 expected_deadline_date = '8888' expected_reference_email = ['*****@*****.**'] result = jobs.do(create_record(snippet)) assert validate(result['deadline_date'], subschema_deadline_date) is None assert expected_deadline_date == result['deadline_date'] assert validate(result['reference_email'], subschema_reference_email) is None assert expected_reference_email == result['reference_email']
def test_date_closed_from_046__i_and_046__l_an_url(): schema = load_schema('jobs') subschema_deadline_date = schema['properties']['deadline_date'] subschema_urls = schema['properties']['urls'] snippet = ( '<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2012-06-01</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">http://www.pma.caltech.edu/physics-search</subfield>' ' </datafield>' '</record>' ) # record/963314 expected_deadline_date = '2012-06-01' expected_urls = [ {'value': 'http://www.pma.caltech.edu/physics-search'}, ] result = jobs.do(create_record(snippet)) assert validate(result['deadline_date'], subschema_deadline_date) is None assert expected_deadline_date == result['deadline_date'] assert validate(result['urls'], subschema_urls) is None assert expected_urls == result['urls']
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob from inspirehep.dojson.utils import strip_empty_values from inspirehep.dojson.hep import hep from inspirehep.dojson.institutions import institutions from inspirehep.dojson.journals import journals from inspirehep.dojson.experiments import experiments from inspirehep.dojson.hepnames import hepnames from inspirehep.dojson.jobs import jobs from inspirehep.dojson.conferences import conferences for data in split_blob(source.read()): record = create_record(data) if _collection_in_record(record, 'institution'): yield strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): yield strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): yield strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): yield strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): yield strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): yield strip_empty_values(conferences.do(record)) else: yield strip_empty_values(hep.do(record))
def test_institutions_from_110__double_a_z(): snippet = ( '<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' '</datafield>' ) # record/1328021/export/xme expected = [ { 'curated_relation': True, 'name': 'Indiana U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/902874', }, }, { 'curated_relation': True, 'name': 'NIST, Wash., D.C.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903056', }, }, ] result = jobs.do(create_record(snippet)) assert expected == result['institutions']
def test_institutions_from_double_110__a(): snippet = ( '<record>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Coll. William and Mary</subfield>' ' </datafield>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Jefferson Lab</subfield>' ' </datafield>' '</record>' ) # record/1427342 expected = [ { 'curated_relation': False, 'name': 'Coll. William and Mary', }, { 'curated_relation': False, 'name': 'Jefferson Lab', }, ] result = jobs.do(create_record(snippet)) assert expected == result['institutions']
def test_institutions_from_110__double_a_z(): snippet = ( '<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' "</datafield>" ) # record/1328021/export/xme expected = [ { "curated_relation": True, "name": "Indiana U.", "record": {"$ref": "http://localhost:5000/api/institutions/902874"}, }, { "curated_relation": True, "name": "NIST, Wash., D.C.", "record": {"$ref": "http://localhost:5000/api/institutions/903056"}, }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["institutions"]
def test_experiments_from_693__e__0_and_e(): snippet = ( '<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' ' <subfield code="0">1108541</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">IHEP-CEPC</subfield>' ' </datafield>' '</record>' ) # record/1393583 /export/xme expected = [ { 'curated_relation': True, 'name': 'CERN-LHC-ATLAS', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108541', }, }, { 'curated_relation': False, 'name': 'IHEP-CEPC' } ] result = jobs.do(create_record(snippet)) assert expected == result['experiments']
def test_contact_details_from_multiple_marcxml_270(): snippet = ( '<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' </datafield>' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="p">Wynton Marsalis</subfield>' ' </datafield>' '</record>' ) expected = [ { 'name': 'Manfred Lindner', 'email': '*****@*****.**', }, { 'name': 'Wynton Marsalis', }, ] result = jobs.do(create_record(snippet)) assert expected == result['contact_details']
def test_regions_from_043__a_corrects_misspellings(): snippet = '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">United States</subfield>' "</datafield>" expected = ["North America"] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["regions"]
def test_institutions_from_110__double_a_z(): snippet = ('<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' '</datafield>') # record/1328021/export/xme expected = [ { 'curated_relation': True, 'name': 'Indiana U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/902874', }, }, { 'curated_relation': True, 'name': 'NIST, Wash., D.C.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903056', }, }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['institutions']
def test_regions_from_043__a(): snippet = '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia</subfield>' "</datafield>" expected = ["Asia"] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["regions"]
def test_regions_from_043__a(): snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia</subfield>' '</datafield>') expected = ['Asia'] result = jobs.do(create_record(snippet)) assert expected == result['regions']
def test_position_from_245__a(): snippet = ('<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Neutrino Physics</subfield>' '</datafield>') # record/1467312 expected = 'Neutrino Physics' result = clean_record(jobs.do(create_record(snippet))) assert expected == result['position']
def test_date_closed_from_046__l(): snippet = ( '<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">2008-02-11</subfield>' "</datafield>" ) # record/934304 expected = "2008-02-11" result = clean_record(jobs.do(create_record(snippet))) assert expected == result["date_closed"]
def test_position_from_245__a(): snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Neutrino Physics</subfield>' "</datafield>" ) # record/1467312 expected = "Neutrino Physics" result = clean_record(jobs.do(create_record(snippet))) assert expected == result["position"]
def test_date_closed_from_046__i(): snippet = ( '<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2015-12-15</subfield>' "</datafield>" ) # record/1310294 expected = "2015-12-15" result = clean_record(jobs.do(create_record(snippet))) assert expected == result["deadline_date"]
def test_regions_from_043__a_splits_on_commas(): snippet = ( '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia, North America</subfield>' "</datafield>" ) expected = ["Asia", "North America"] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["regions"]
def test_date_closed_from_046__l(): snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">2008-02-11</subfield>' '</datafield>') # record/934304 expected = '2008-02-11' result = clean_record(jobs.do(create_record(snippet))) assert expected == result['date_closed']
def test_regions_from_043__a_splits_on_commas(): snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia, North America</subfield>' '</datafield>') expected = ['Asia', 'North America'] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['regions']
def test_regions_from_043__a_corrects_misspellings(): snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">United States</subfield>' '</datafield>') expected = ['North America'] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['regions']
def test_date_closed_from_046__i(): snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2015-12-15</subfield>' '</datafield>') # record/1310294 expected = '2015-12-15' result = clean_record(jobs.do(create_record(snippet))) assert expected == result['deadline_date']
def test_description_from_520__a(): snippet = ( '<datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses.&nbsp;</subfield>' '</datafield>') # record/1239755 expected = '(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses. ' result = clean_record(jobs.do(create_record(snippet))) assert expected == result['description']
def test_institutions_from_110__a(): snippet = ( '<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Coll. William and Mary</subfield>' "</datafield>" ) # record/1427342 expected = [{"curated_relation": False, "name": "Coll. William and Mary"}] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["institutions"]
def test_description_from_520__a(): snippet = ( '<datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses.&nbsp;</subfield>' '</datafield>' ) # record/1239755 expected = '(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses. ' result = jobs.do(create_record(snippet)) assert expected == result['description']
def test_date_closed_from_046__l(): snippet = ( '<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">2008-02-11</subfield>' '</datafield>' ) # record/934304 expected = '2008-02-11' result = jobs.do(create_record(snippet)) assert expected == result['date_closed']
def test_position_from_245__a(): snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Neutrino Physics</subfield>' '</datafield>' ) # record/1467312 expected = 'Neutrino Physics' result = jobs.do(create_record(snippet)) assert expected == result['position']
def test_regions_from_043__a_splits_on_commas(): snippet = ( '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia, North America</subfield>' '</datafield>' ) expected = ['Asia', 'North America'] result = jobs.do(create_record(snippet)) assert expected == result['regions']
def test_regions_from_043__a(): snippet = ( '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia</subfield>' '</datafield>' ) expected = ['Asia'] result = jobs.do(create_record(snippet)) assert expected == result['regions']
def create_record(data, force=False, dry_run=False): record = marc_create_record(data) recid = None if '001' in record: recid = int(record['001'][0]) if not dry_run and recid: prod_record = InspireProdRecords(recid=recid) prod_record.marcxml = data try: if _collection_in_record(record, 'institution'): json = strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): json = strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): json = strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): json = strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): json = strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): json = strip_empty_values(conferences.do(record)) else: json = strip_empty_values(hep.do(record)) if dry_run: return recid, json if force and any(key in json for key in ('control_number', 'recid')): try: control_number = json['control_number'] except KeyError: control_number = json['recid'] control_number = int(control_number) # Searches if record already exists. record = Record.get_record(control_number) if record is None: # Adds the record to the db session. rec = RecordModel(id=control_number) db.session.merge(rec) record = Record.create(json) else: record = Record(json, model=record.model) record.commit() if recid: prod_record.successful = True db.session.merge(prod_record) logger.info("Elaborated record {}".format(control_number)) return control_number, dict(record) except Exception: if recid: prod_record.successful = False db.session.merge(prod_record) logger.exception("Error in elaborating record ID {}".format(recid)) raise
def test_regions_from_043__a_corrects_misspellings(): snippet = ( '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">United States</subfield>' '</datafield>' ) expected = ['North America'] result = jobs.do(create_record(snippet)) assert expected == result['regions']
def test_continent_from_043__a(): snippet = ( '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia</subfield>' '</datafield>' ) expected = 'Asia' result = clean_record(jobs.do(create_record(snippet))) assert expected == result['continent']
def test_ranks_from_marcxml_656_with_single_a(): """Two ranks inside one record.""" snippet = ('<record>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' ' </datafield>' '</record>') result = clean_record(jobs.do(create_record(snippet))) assert result['_ranks'] == ['Senior'] assert result['ranks'] == ['SENIOR']
def test_experiments_from_693__e(): snippet = ( '<datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' '</datafield>' ) # record/1471772 expected = [ 'CERN-LHC-ATLAS', ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['experiments']
def test_experiments_from_693__e(): snippet = ( "<record>" ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">ALIGO</subfield>' " </datafield>" "</record>" ) # record/1375852 expected = [{"curated_relation": False, "name": "ALIGO"}] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["experiments"]
def test_ranks_from_marcxml_656_with_single_a(): """Two ranks inside one record.""" snippet = ( "<record>" ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' " </datafield>" "</record>" ) result = clean_record(jobs.do(create_record(snippet))) assert result["_ranks"] == ["Senior"] assert result["ranks"] == ["SENIOR"]
def test_contact_details_from_marcxml_270_single_p_single_m(): snippet = ( "<record> " ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' " </datafield>" "</record>" ) expected = [{"name": "Manfred Lindner", "email": "*****@*****.**"}] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["contact_details"]
def test_ranks_from_marcxml_656_with_single_a(): """Two ranks inside one record.""" snippet = ( '<record>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' ' </datafield>' '</record>' ) result = jobs.do(create_record(snippet)) assert result['_ranks'] == ['Senior'] assert result['ranks'] == ['SENIOR']
def test_institutions_from_110__a(): snippet = ('<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Coll. William and Mary</subfield>' '</datafield>') # record/1427342 expected = [ { 'curated_relation': False, 'name': 'Coll. William and Mary', }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['institutions']
def test_ranks_from_marcxml_656_with_single_a(): schema = load_schema('jobs') subschema = schema['properties']['ranks'] snippet = ( '<datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' '</datafield>' ) result = jobs.do(create_record(snippet)) assert validate(result['ranks'], subschema) is None assert result['ranks'] == ['SENIOR']
def create_record(recid, record, force=False, dry_run=False, validation=False): """Create record from marc21 model.""" errors = "" if _collection_in_record(record, 'institution'): json = strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): json = strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): json = strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): json = strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): json = strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): json = strip_empty_values(conferences.do(record)) else: json = strip_empty_values(hep.do(record)) if validation: try: validate(json) except ValidationError as err: errors = "ValidationError: Record {0}: {1}".format(recid, err) current_app.logger.warning(errors) if dry_run: return errors, recid, json if force and any(key in json for key in ('control_number', 'recid')): try: control_number = json['control_number'] except KeyError: control_number = json['recid'] control_number = int(control_number) # Searches if record already exists. with db.session.begin_nested(): record = Record.get_record(control_number) if record is None: # Adds the record to the db session. rec = RecordModel(id=control_number) db.session.merge(rec) record = Record.create(json) else: record = Record(json, model=record.model) record.commit() logger.info("Elaborated record {}".format(control_number)) return errors, control_number, dict(record)
def test_ranks_from_marcxml_double_656(): """Two ranks inside one record.""" snippet = ('<record>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' ' </datafield>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Junior</subfield>' ' </datafield>' '</record>') result = jobs.do(create_record(snippet)) assert result['_ranks'] == ['Senior', 'Junior'] assert result["ranks"] == ['SENIOR', 'JUNIOR']
def test_date_closed_from_046_i_l_an_email(): snippet = ('<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">[email protected]</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">8888</subfield>' ' </datafield>' '</record>') # record/1089529 result = clean_record(jobs.do(create_record(snippet))) assert result['deadline_date'] == '8888' assert result['reference_email'] == [ '*****@*****.**', ]
def test_experiments_from_693__e(): snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">ALIGO</subfield>' ' </datafield>' '</record>') # record/1375852 expected = [ { 'curated_relation': False, 'name': 'ALIGO', }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['experiments']
def test_contact_details_from_marcxml_270_single_p_single_m(): snippet = ('<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' </datafield>' '</record>') expected = [ { 'name': 'Manfred Lindner', 'email': '*****@*****.**', }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['contact_details']
def overdo_marc_dict(record): """Convert MARC Groupable Ordered Dict into JSON.""" if _collection_in_record(record, 'institution'): return institutions.do(record) elif _collection_in_record(record, 'experiment'): return experiments.do(record) elif _collection_in_record(record, 'journals'): return journals.do(record) elif _collection_in_record(record, 'hepnames'): return hepnames.do(record) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): return jobs.do(record) elif _collection_in_record(record, 'conferences'): return conferences.do(record) else: return hep.do(record)
def test_contact_details_from_marcxml_270_double_p_single_m(): """Two people having same e-mail address. We do not support it.""" snippet = ('<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' <subfield code="p">Boogeyman</subfield>' ' </datafield>' '</record>') expected = [ { 'email': '*****@*****.**', }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['contact_details']
def test_contact_details_from_marcxml_270_single_p_double_m(): """One person having two e-mail addresses. We do not support it.""" snippet = ('<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' </datafield>' '</record>') expected = [ { 'name': 'Manfred Lindner' }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['contact_details']
def test_date_closed_from_046__i_l_an_url(): snippet = ( '<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2012-06-01</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">http://www.pma.caltech.edu/physics-search</subfield>' ' </datafield>' '</record>') # record/963314 result = clean_record(jobs.do(create_record(snippet))) assert result['deadline_date'] == '2012-06-01' assert result['urls'] == [ { 'value': 'http://www.pma.caltech.edu/physics-search', }, ]
def test_experiments_from_693__e__0(): snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' ' <subfield code="0">1108541</subfield>' ' </datafield>' '</record>') # record/1332138 expected = [ { 'curated_relation': True, 'name': 'CERN-LHC-ATLAS', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108541', }, }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['experiments']
def test_experiments_from_triple_693__e__0(): snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-NA-049</subfield>' ' <subfield code="0">1110308</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-NA-061</subfield>' ' <subfield code="0">1108234</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ALICE</subfield>' ' <subfield code="0">1110642</subfield>' ' </datafield>' '</record>') # record/1469159 expected = [{ 'curated_relation': True, 'name': 'CERN-NA-049', 'record': { '$ref': 'http://localhost:5000/api/experiments/1110308', }, }, { 'curated_relation': True, 'name': 'CERN-NA-061', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108234', }, }, { 'curated_relation': True, 'name': 'CERN-LHC-ALICE', 'record': { '$ref': 'http://localhost:5000/api/experiments/1110642', }, }] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['experiments']
def test_experiments_from_693__e__0_and_e(): snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' ' <subfield code="0">1108541</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">IHEP-CEPC</subfield>' ' </datafield>' '</record>') # record/1393583 /export/xme expected = [{ 'curated_relation': True, 'name': 'CERN-LHC-ATLAS', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108541', }, }, { 'curated_relation': False, 'name': 'IHEP-CEPC' }] result = jobs.do(create_record(snippet)) assert expected == result['experiments']
def create_record(record, force=True, dry_run=False): """Create record from marc21 model.""" errors = "" if _collection_in_record(record, 'institution'): json = strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): json = strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): json = strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): json = strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): json = strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): json = strip_empty_values(conferences.do(record)) else: json = strip_empty_values(hep.do(record)) if dry_run: return errors, json return json