def _parse_structures(record): structures = [] recids = [] for author in record.get('authors', []): for affiliation in author.get('affiliations', []): try: recids.append( str(get_recid_from_ref(affiliation['record'])) ) affiliation['recid'] = get_recid_from_ref( affiliation['record'] ) except KeyError: continue try: records = get_es_records('ins', recids) except RequestError: records = [] for record in records: structures.append( _structure_data(record) ) return dedupe_list(structures)
def record_upsert(json): """Insert or update a record.""" control_number = json.get('control_number', json.get('recid')) if control_number: control_number = int(control_number) pid_type = InspireRecordIdProvider.schema_to_pid_type(json['$schema']) try: pid = PersistentIdentifier.get(pid_type, control_number) record = Record.get_record(pid.object_uuid) record.update(json) record.commit() except PIDDoesNotExistError: record = Record.create(json, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if new_recid: merged_record = get_db_record(pid_type, new_recid) merge_pidstores_of_two_merged_records(merged_record.id, record.id) else: soft_delete_pidstore_for_record(record.id) return record
def test_publication_info(marcxml_to_json, json_to_marc): """Test if publication info is created correctly.""" assert (marcxml_to_json['publication_info'][0]['page_artid'] == json_to_marc['773'][0]['c']) assert (marcxml_to_json['publication_info'][0]['journal_issue'] == json_to_marc['773'][0]['n']) assert (marcxml_to_json['publication_info'][0]['journal_title'] == json_to_marc['773'][0]['p']) assert (marcxml_to_json['publication_info'][0]['journal_volume'] == json_to_marc['773'][0]['v']) assert (get_recid_from_ref( marcxml_to_json['publication_info'][0]['parent_record']) == json_to_marc['773'][0]['0']) assert (marcxml_to_json['publication_info'][0]['year'] == json_to_marc['773'][0]['y']) assert (marcxml_to_json['publication_info'][0]['conf_acronym'] == json_to_marc['773'][0]['o']) assert (marcxml_to_json['publication_info'][0]['reportnumber'] == json_to_marc['773'][0]['r']) assert (marcxml_to_json['publication_info'][0]['confpaper_info'] == json_to_marc['773'][0]['t']) assert (marcxml_to_json['publication_info'][0]['cnum'] == json_to_marc['773'][0]['w']) assert (marcxml_to_json['publication_info'][0]['pubinfo_freetext'] == json_to_marc['773'][0]['x']) assert (marcxml_to_json['publication_info'][0]['isbn'] == json_to_marc['773'][0]['z']) assert (marcxml_to_json['publication_info'][0]['note'] == json_to_marc['773'][0]['m'])
def succeeding_entry2marc(self, key, value): """Succeeding Entry.""" return { 'r': value.get('relationship_code'), 'w': inspire_dojson_utils.get_recid_from_ref(value.get('record')), 'z': value.get('isbn'), }
def test_publication_info(marcxml_to_json, json_to_marc): """Test if publication info is created correctly.""" assert (marcxml_to_json['publication_info'][0]['page_artid'] == json_to_marc['773'][0]['c']) assert (marcxml_to_json['publication_info'][0]['journal_issue'] == json_to_marc['773'][0]['n']) assert (marcxml_to_json['publication_info'][0]['journal_title'] == json_to_marc['773'][0]['p']) assert (marcxml_to_json['publication_info'][0]['journal_volume'] == json_to_marc['773'][0]['v']) assert (get_recid_from_ref(marcxml_to_json['publication_info'] [0]['parent_record']) == json_to_marc['773'][0]['0']) assert (marcxml_to_json['publication_info'][0]['year'] == json_to_marc['773'][0]['y']) assert (marcxml_to_json['publication_info'][0]['conf_acronym'] == json_to_marc['773'][0]['o']) assert (marcxml_to_json['publication_info'][0]['reportnumber'] == json_to_marc['773'][0]['r']) assert (marcxml_to_json['publication_info'][0]['confpaper_info'] == json_to_marc['773'][0]['t']) assert (marcxml_to_json['publication_info'][0]['cnum'] == json_to_marc['773'][0]['w']) assert (marcxml_to_json['publication_info'][0]['pubinfo_freetext'] == json_to_marc['773'][0]['x']) assert (marcxml_to_json['publication_info'][0]['isbn'] == json_to_marc['773'][0]['z']) assert (marcxml_to_json['publication_info'][0]['note'] == json_to_marc['773'][0]['m'])
def test_succeeding_entry(marcxml_to_json, json_to_marc): """Test if succeeding_entry is created correctly.""" assert (marcxml_to_json['succeeding_entry']['relationship_code'] == json_to_marc['785']['r']) assert (get_recid_from_ref(marcxml_to_json['succeeding_entry']['record']) == json_to_marc['785']['w']) assert (marcxml_to_json['succeeding_entry']['isbn'] == json_to_marc['785'] ['z'])
def test_succeeding_entry(marcxml_to_json, json_to_marc): """Test if succeeding_entry is created correctly.""" assert (marcxml_to_json['succeeding_entry'] ['relationship_code'] == json_to_marc['785']['r']) assert (get_recid_from_ref( marcxml_to_json['succeeding_entry']['record']) == json_to_marc['785']['w']) assert (marcxml_to_json['succeeding_entry']['isbn'] == json_to_marc['785']['z'])
def _structure_data(struct): return { 'type': get_value(struct, "collections[1].primary", "").lower(), # ^^ FIXME: This may not be one of the HAL accepted values: # institution, department, laboratory or researchteam 'name': get_value(struct, "institution[0]", ""), 'address': get_value(struct, "address[0].original_address", []), 'country': get_value(struct, "address[0].country_code", ""), 'recid': get_recid_from_ref(struct['self']), }
def test_references(marcxml_to_json, json_to_marc): """Test if references are created correctly.""" for index, val in enumerate(marcxml_to_json['references']): if 'record' in val: assert (get_recid_from_ref(val['record']) == json_to_marc['999C5'][index]['0']) if 'texkey' in val: assert (val['texkey'] == json_to_marc['999C5'][index]['1']) if 'doi' in val: assert (val['doi'] == json_to_marc['999C5'][index]['a']) if 'collaboration' in val: assert (val['collaboration'] == json_to_marc['999C5'][index]['c']) if 'editors' in val: assert (val['editors'] == json_to_marc['999C5'][index]['e']) if 'authors' in val: assert (val['authors'] == json_to_marc['999C5'][index]['h']) if 'misc' in val: assert (val['misc'] == json_to_marc['999C5'][index]['m']) if 'number' in val: assert (val['number'] == json_to_marc['999C5'][index]['o']) if 'isbn' in val: assert (val['isbn'] == json_to_marc['999C5'][index]['i']) if 'publisher' in val: assert (val['publisher'] == json_to_marc['999C5'][index]['p']) if 'maintitle' in val: assert (val['maintitle'] == json_to_marc['999C5'][index]['q']) if 'report_number' in val: assert (val['report_number'] == json_to_marc['999C5'][index]['r']) if 'title' in val: assert (val['title'] == json_to_marc['999C5'][index]['t']) if 'url' in val: assert (val['url'] == json_to_marc['999C5'][index]['u']) if 'journal_pubnote' in val: assert (val['journal_pubnote'] == json_to_marc['999C5'][index]['s']) if 'raw_reference' in val: assert (val['raw_reference'] == json_to_marc['999C5'][index]['x']) if 'year' in val: assert (val['year'] == json_to_marc['999C5'][index]['y'])
def _parse_structures(record): structures = [] recids = [] for author in record.get('authors', []): for affiliation in author.get('affiliations', []): try: recids.append(str(get_recid_from_ref(affiliation['record']))) affiliation['recid'] = get_recid_from_ref( affiliation['record']) except KeyError: continue try: records = get_es_records('ins', recids) except RequestError: records = [] for record in records: structures.append(_structure_data(record)) return dedupe_list(structures)
def test_get_recid_from_ref(): assert utils.get_recid_from_ref(None) == None assert utils.get_recid_from_ref('a_string') == None assert utils.get_recid_from_ref({}) == None assert utils.get_recid_from_ref({'bad_key': 'some_val'}) == None assert utils.get_recid_from_ref({'$ref': 'a_string'}) == None assert utils.get_recid_from_ref({'$ref': 'http://bad_url'}) == None
def spires_sysnos2marc(self, key, value): """970 SPIRES number and new recid.""" value = utils.force_list(value) existing_values = self.get('970', []) if key == 'spires_sysnos': existing_values.extend([{'a': val} for val in value if val]) elif key == 'new_record': val_recids = [ inspire_dojson_utils.get_recid_from_ref(val) for val in value ] existing_values.extend([{'d': val} for val in val_recids if val]) return existing_values
def get_value(value): affiliations = [ aff.get('value') for aff in value.get('affiliations', []) ] return { 'a': value.get('full_name'), 'e': value.get('role'), 'q': value.get('alternative_name'), 'i': value.get('inspire_id'), 'j': value.get('orcid'), 'm': value.get('email'), 'u': affiliations, 'x': inspire_dojson_utils.get_recid_from_ref(value.get('record')), 'y': value.get('curated_relation') }
def _recusive_find_refs(json_root): if isinstance(json_root, list): items = enumerate(json_root) elif isinstance(json_root, dict): # Note that items have to be generated before altering the dict. # In this case, iteritems might break during iteration. items = json_root.items() else: items = [] for key, value in items: if (isinstance(json_root, dict) and isinstance(value, dict) and '$ref' in value): # Append '_recid' and remove 'record' from the key name. key_basename = key.replace('record', '').rstrip('_') new_key = '{}_recid'.format(key_basename).lstrip('_') json_root[new_key] = get_recid_from_ref(value) elif (isinstance(json_root, dict) and isinstance(value, list) and key in list_ref_fields_translations): new_list = [get_recid_from_ref(v) for v in value] new_key = list_ref_fields_translations[key] json_root[new_key] = new_list else: _recusive_find_refs(value)
def spires_sysnos2marc(self, key, value): """970 SPIRES number and new recid.""" value = utils.force_list(value) existing_values = self.get('970', []) if key == 'spires_sysnos': existing_values.extend( [{'a': val} for val in value if val] ) elif key == 'new_record': val_recids = [inspire_dojson_utils.get_recid_from_ref(val) for val in value] existing_values.extend( [{'d': val} for val in val_recids if val] ) return existing_values
def test_authors(marcxml_to_json, json_to_marc): """Test if authors are created correctly.""" assert ( marcxml_to_json['authors'][0]['full_name'] == json_to_marc['100']['a']) assert (marcxml_to_json['authors'][0]['role'] == json_to_marc['100']['e']) assert (marcxml_to_json['authors'][0]['alternative_name'] == json_to_marc['100']['q']) assert (marcxml_to_json['authors'][0]['inspire_id'] == json_to_marc['100'] ['i']) assert (marcxml_to_json['authors'][0]['orcid'] == json_to_marc['100']['j']) assert (marcxml_to_json['authors'][0]['email'] == json_to_marc['100']['m']) assert (marcxml_to_json['authors'][0]['affiliations'][0]['value'] == json_to_marc['100']['u'][0]) assert (get_recid_from_ref( marcxml_to_json['authors'][0]['record']) == json_to_marc['100']['x']) assert (marcxml_to_json['authors'][0]['curated_relation'] == json_to_marc['100']['y'])
def check_if_record_is_going_to_be_deleted(sender, *args, **kwargs): """Checks if 'deleted' field is set as True before updating. If 'deleted' field exists and its value is True, before update, then delete all the record's pidstores. """ control_number = int(sender.get('control_number')) collection = InspireRecordIdProvider.schema_to_pid_type(sender.get('$schema')) record = get_db_record(collection, control_number) if sender.get('deleted'): new_recid = get_recid_from_ref(sender.get('new_record')) if new_recid: merged_record = get_db_record(collection, new_recid) merge_pidstores_of_two_merged_records(merged_record.id, record.id) else: record = get_db_record(collection, control_number) soft_delete_pidstore_for_record(record.id)
def publication_info2marc(self, key, value): """Publication info about record.""" return { '0': inspire_dojson_utils.get_recid_from_ref( value.get('parent_record')), 'c': value.get('page_artid'), 'n': value.get('journal_issue'), 'o': value.get('conf_acronym'), 'p': value.get('journal_title'), 'r': value.get('reportnumber'), 't': value.get('confpaper_info'), 'v': value.get('journal_volume'), 'w': value.get('cnum'), 'x': value.get('pubinfo_freetext'), 'y': value.get('year'), 'z': value.get('isbn'), 'm': value.get('note') }
def test_authors(marcxml_to_json, json_to_marc): """Test if authors are created correctly.""" assert (marcxml_to_json['authors'][0]['full_name'] == json_to_marc['100']['a']) assert (marcxml_to_json['authors'][0]['role'] == json_to_marc['100']['e']) assert (marcxml_to_json['authors'][0]['alternative_name'] == json_to_marc['100']['q']) assert (marcxml_to_json['authors'][0]['inspire_id'] == json_to_marc['100']['i']) assert (marcxml_to_json['authors'][0]['orcid'] == json_to_marc['100']['j']) assert (marcxml_to_json['authors'][0]['email'] == json_to_marc['100']['m']) assert (marcxml_to_json['authors'][0]['affiliations'][0]['value'] == json_to_marc['100']['u'][0]) assert (get_recid_from_ref(marcxml_to_json['authors'][0]['record']) == json_to_marc['100']['x']) assert (marcxml_to_json['authors'][0]['curated_relation'] == json_to_marc['100']['y'])
def references2marc(self, key, value): """Produce list of references.""" return { '0': inspire_dojson_utils.get_recid_from_ref(value.get('record')), '1': value.get('texkey'), 'a': value.get('doi'), 'c': value.get('collaboration'), 'e': value.get('editors'), 'h': value.get('authors'), 'm': value.get('misc'), 'o': value.get('number'), 'i': value.get('isbn'), 'p': value.get('publisher'), 'q': value.get('maintitle'), 'r': value.get('report_number'), 't': value.get('title'), 'u': value.get('url'), 's': value.get('journal_pubnote'), 'x': value.get('raw_reference'), 'y': value.get('year'), }
def test_references(marcxml_to_json, json_to_marc): """Test if references are created correctly.""" for index, val in enumerate(marcxml_to_json['references']): if 'record' in val: assert (get_recid_from_ref( val['record']) == json_to_marc['999C5'][index]['0']) if 'texkey' in val: assert (val['texkey'] == json_to_marc['999C5'][index]['1']) if 'doi' in val: assert (val['doi'] == json_to_marc['999C5'][index]['a']) if 'collaboration' in val: assert (val['collaboration'] == json_to_marc['999C5'][index]['c']) if 'editors' in val: assert (val['editors'] == json_to_marc['999C5'][index]['e']) if 'authors' in val: assert (val['authors'] == json_to_marc['999C5'][index]['h']) if 'misc' in val: assert (val['misc'] == json_to_marc['999C5'][index]['m']) if 'number' in val: assert (val['number'] == json_to_marc['999C5'][index]['o']) if 'isbn' in val: assert (val['isbn'] == json_to_marc['999C5'][index]['i']) if 'publisher' in val: assert (val['publisher'] == json_to_marc['999C5'][index]['p']) if 'maintitle' in val: assert (val['maintitle'] == json_to_marc['999C5'][index]['q']) if 'report_number' in val: assert (val['report_number'] == json_to_marc['999C5'][index]['r']) if 'title' in val: assert (val['title'] == json_to_marc['999C5'][index]['t']) if 'url' in val: assert (val['url'] == json_to_marc['999C5'][index]['u']) if 'journal_pubnote' in val: assert ( val['journal_pubnote'] == json_to_marc['999C5'][index]['s']) if 'raw_reference' in val: assert (val['raw_reference'] == json_to_marc['999C5'][index]['x']) if 'year' in val: assert (val['year'] == json_to_marc['999C5'][index]['y'])
def test_references(marcxml_to_json, json_to_marc, marcxml_record): """Test if references are created correctly.""" def _force_set(obj): return set(utils.force_list(obj) or []) assert len(marcxml_record['999C5']) == len(marcxml_to_json['references']) assert len(json_to_marc['999C5']) == len(marcxml_to_json['references']) for index, json_val in enumerate(marcxml_to_json['references']): marc_val = json_to_marc['999C5'][index] marc_init = marcxml_record['999C5'][index] json_val_pub = json_val.get('publication_info', {}) if '0' in marc_init: assert 'record' in json_val and '0' in marc_val assert get_recid_from_ref(json_val['record']) == marc_val['0'] if '1' in marc_init: assert 'texkey' in json_val and '1' in marc_val assert json_val['texkey'] == marc_val['1'] if 'a' in marc_init: assert 'dois' in json_val and 'a' in marc_val assert json_val['dois'][0] == marc_val['a'] if 'c' in marc_init: assert 'collaboration' in json_val and 'c' in marc_val assert json_val['collaboration'] == marc_val['c'] if 'e' in marc_init: assert _force_set(marc_val['e']) == _force_set(marc_init['e']) if 'h' in marc_init: assert 'authors' in json_val and 'h' in marc_val json_names = _force_set([a['full_name'] for a in json_val['authors']]) roundtrip_names = _force_set(marc_val['h']) roundtrip_editors = _force_set(marc_val.get('e', [])) assert json_names and roundtrip_names assert json_names.difference(roundtrip_editors) == roundtrip_names if 'm' in marc_init: assert 'misc' in json_val and 'm' in marc_val assert _force_set(json_val['misc']) == _force_set(marc_val['m']) assert _force_set(json_val['misc']) == _force_set(marc_init['m']) if 'o' in marc_init: assert 'number' in json_val and 'o' in marc_val assert json_val['number'] == marc_val['o'] if 'i' in marc_init: assert 'isbn' in json_val_pub and 'i' in marc_val assert json_val_pub['isbn'] == marc_val['i'] if 'p' in marc_init: assert 'publisher' in json_val.get('imprint', {}) assert 'p' in marc_val assert marc_init['p'] == marc_val['p'] assert json_val['imprint']['publisher'] == marc_val['p'] if 'r' in marc_init: initial_repnos = _force_set(marc_init['r']) json_repnos = _force_set(json_val_pub.get('reportnumber', [])) json_repnos.union(_force_set(json_val.get('arxiv_eprints', []))) roundtrip_repnos = _force_set(marc_val['r']) assert roundtrip_repnos == json_repnos # We should have at least one in the end. assert roundtrip_repnos # But we can not be sure that we ported all of them. assert json_repnos.issubset(initial_repnos) if 't' in marc_init: initial_titles = _force_set(marc_init['t']) json_titles = _force_set([t['title'] for t in json_val['titles']]) roundtrip_titles = _force_set(marc_val['t']) assert initial_titles == json_titles assert initial_titles == roundtrip_titles if 'u' in marc_init: initial_urls = _force_set(marc_init['u']) json_urls = _force_set([u['value'] for u in json_val['urls']]) roundtrip_urls = _force_set(marc_val['u']) assert initial_urls == json_urls assert initial_urls == roundtrip_urls if 's' in marc_init: assert marc_init['s'] == marc_val['s'] if 'x' in marc_init: initial_raw = _force_set(marc_init['x']) json_raw = _force_set([r['value'] for r in json_val['raw_reference']]) roundtrip_raw = _force_set(marc_val['x']) assert initial_raw == json_raw assert initial_raw == roundtrip_raw if 'y' in marc_init: assert 'year' in json_val_pub and 'y' in marc_val assert str(json_val_pub['year']) == str(marc_init['y']) assert str(marc_val['y']) == str(marc_init['y'])
def test_book_link(marcxml_to_json_book): """Test if the link to the book recid is generated correctly.""" assert (get_recid_from_ref( marcxml_to_json_book['book']['record']) == 1409249)
def test_new_record(marcxml_to_json, json_to_marc): """Test if new_record is created correctly.""" assert (get_recid_from_ref(marcxml_to_json['new_record']) in [p.get('d') for p in json_to_marc['970'] if 'd' in p])
def test_deleted_records(marcxml_to_json, json_to_marc): """Test if deleted_recids is created correctly.""" assert (get_recid_from_ref(marcxml_to_json['deleted_records'][0]) in [p.get('a') for p in json_to_marc['981'] if 'a' in p])
def test_get_recid_from_ref_returns_none_on_ref_malformed(): assert get_recid_from_ref({'$ref': 'http://bad_url'}) is None
def test_get_recid_from_ref_returns_none_on_empty_object(): assert get_recid_from_ref({}) is None
def test_get_recid_from_ref_returns_none_on_object_with_wrong_key(): assert get_recid_from_ref({'bad_key': 'some_val'}) is None
def test_get_recid_from_ref_returns_none_on_none(): assert get_recid_from_ref(None) is None
def test_get_recid_from_ref_returns_none_on_simple_strings(): assert get_recid_from_ref('a_string') is None
def deleted_records2marc(self, key, value): """Deleted recids.""" return { 'a': inspire_dojson_utils.get_recid_from_ref(value) }
def deleted_records2marc(self, key, value): """Deleted recids.""" return {'a': inspire_dojson_utils.get_recid_from_ref(value)}
def test_get_recid_from_ref_returns_none_on_ref_a_simple_string(): assert get_recid_from_ref({'$ref': 'a_string'}) is None
def test_book_link(marcxml_to_json_book): """Test if the link to the book recid is generated correctly.""" assert (get_recid_from_ref(marcxml_to_json_book['book']['record']) == 1409249)