def create_xml(author, email, affiliation, experiment, inspire_id, orcid, native_name): '''Create the xml file to upload.''' common_fields = {} common_tags = {} author2 = re.sub(r'(.*)\, (.*)', r'\2 \1', author) common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', author2), ('g', 'ACTIVE')] if affiliation: if isinstance(affiliation, (list, )): for aff in affiliation: common_tags['371__'] = [('m', email), ('a', aff), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('a', affiliation), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('z', 'current')] if experiment: common_tags['693__'] = [('e', experiment), ('z', 'current')] common_tags['035__'] = [('9', 'INSPIRE'), ('a', inspire_id)] if orcid: common_tags['035__'] = [('9', 'ORCID'), ('a', orcid)] if SOURCE: common_tags['670__'] = [('a', SOURCE)] if native_name: common_tags['880__'] = [('a', native_name)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields = common_tags[key]) #return common_fields return print_rec(common_fields)
def check_records(records, doi_field="0247_a", extra_subfields=(("2", "DOI"),)): """ Find the DOI for the records using crossref and add it to the specified field. This plugin won't ask for the DOI if it's already set. """ records_to_check = {} for record in records: has_doi = False for position, value in record.iterfield("0247_2"): if value.lower() == "doi": has_doi = True break if not has_doi: records_to_check[record.record_id] = record dois = get_doi_for_records(records_to_check.values()) for record_id, doi in dois.iteritems(): dup_doi_recid = find_record_from_doi(doi) if dup_doi_recid: record.warn("DOI %s to be added to record %s already exists in record/s %s" % (doi, record_id, dup_doi_recid)) continue record = records_to_check[record_id] subfields = [(doi_field[5], doi.encode("utf-8"))] + map(tuple, extra_subfields) record_add_field(record, tag=doi_field[:3], ind1=doi_field[3], ind2=doi_field[4], subfields=subfields) record.set_amended("Added DOI in field %s" % doi_field)
def check_record(record, texkey_field="035__a", extra_subfields=()): """ Add a tex key to a record, checking that it doesn't have one already. """ tag = texkey_field[:3] ind1, ind2, subfield = texkey_field[3:] provenances = list(record.iterfield(texkey_field[:5] + "9")) if len(provenances) and provenances[0][1] in ("SPIRESTeX", "INSPIRETeX"): for _, val in record.iterfield(texkey_field[:5] + "z"): if val: return # Record already has a texkey if len(list(record.iterfield(texkey_field))) == 0: try: texkey = TexkeySeq().next_value(bibrecord=record) except TexkeyNoAuthorError: record.warn("No first author or collaboration") return subfields_to_add = [(subfield, texkey)] + map(tuple, extra_subfields) record_add_field(record, tag=tag, ind1=ind1, ind2=ind2, subfields=subfields_to_add) record.set_amended("Added Tex key '%s' to field %s" % (texkey, texkey_field))
def create_xml(recid, input_dict): '''Create marcxml file from.''' record = {} record_add_field(record, '001', controlfield_value=str(recid)) eprint = input_dict['eprint'] input_dict['035__a'] = 'oai:arXiv.org:' + eprint input_dict['037__a'] = eprint if ARXIV_REGEX_NEW.match(eprint): input_dict['037__a'] = 'arXiv:' + eprint for tag in input_dict: if tag in ('eprint', 'primarch', '0247_a'): continue if tag.startswith('65017a'): class_number = 2 else: class_number = 9 subfields = [] if tag != '269__c': subfields.append((class_number, 'arXiv')) subfields.append((tag[5], input_dict[tag])) if tag == '037__a': subfields.append(('c', input_dict['primarch'])) record_add_field(record, tag[0:3], tag[3], tag[4], subfields=subfields) return print_rec(record)
def create_xml(recid, tags): """Create xml file to replace to 100, 700 block.""" record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], \ tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'v': try: if VERBOSE: print len(AFFILIATIONS_DONE) affiliation_key = re.sub(r'\W+', ' ', value).upper() if not affiliation_key in AFFILIATIONS_DONE: new_values = get_aff(value) AFFILIATIONS_DONE[affiliation_key] = new_values for new_value in AFFILIATIONS_DONE[affiliation_key]: correct_subfields.append(('u', \ new_value.lstrip(' '))) flag = True except TypeError: pass correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record)
def create_xml(recid, correction_dict): """Fix the citations of Fermilab reports.""" tags = [REF] record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = False for (tag, field_instance) in \ [(tag, field_instance) for tag in tags \ for field_instance in record_get_field_instances(record, \ tag[0:3], tag[3], tag[4])]: correct_subfields = [] for code, value in field_instance[0]: if code == 'r' and value.upper() in correction_dict: print 'Was:', value value = correction_dict[value.upper()] print 'Now:', value flag = True correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record) else: return None
def bst_openaire_altmetric(): """ """ recids = search_pattern(p="0->Z", f="0247_a") a = Altmetric() for recid in recids: try: # Check if we already have an Altmetric id sysno_inst = get_fieldvalues(recid, "035__9") if ['Altmetric'] in sysno_inst: continue doi_val = get_fieldvalues(recid, "0247_a")[0] json_res = a.doi(doi_val) rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) if json_res: record_add_field(rec, '035', subfields=[('a', str(json_res['altmetric_id'])), ('9', 'Altmetric')]) bibupload(rec, opt_mode='correct') except AltmetricHTTPException, e: register_exception(prefix='Altmetric error (status code %s): %s' % (e.status_code, str(e)), alert_admin=False)
def generate_columns_longer(ds): """ a much longer implemntation of the column generation""" from invenio.bibrecord import record_add_field rec = {} columns = [[num, "", ""] for num in xrange(ds.num_columns)] # (number, header, title) cur_col = 0 for hd in ds.column_headers: for i in xrange(hd["colspan"]): columns[cur_col][1] = hd["content"].strip() cur_col += 1 cur_col = 0 for ct in ds.column_titles: for i in xrange(ct["colspan"]): columns[cur_col][2] = ct["content"].strip() cur_col += 1 for col in columns: subfields = [("n", str(col[0]))] if col[2] != "": subfields.append(("t", col[2])) if col[1] != "": subfields.append(("d", col[1])) record_add_field(rec, "910", subfields = subfields) return rec
def merge_record_with_template(rec, template_name): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes( template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values( template_field_instance, code)[0]) return rec
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes(template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield(field_instance, code, field_get_subfield_values(template_field_instance, code)[0]) return rec
def main(): # from_base = 'http://openaire.cern.ch/' to_base = 'http://localhost:4000/' # All records recids = search_pattern(p="0->Z", f="8564_u") print "<collection>" for recid in recids: # Get record information touched = False file_links = get_fieldvalues(recid, "8564_u") def replace_link(x): if x.startswith(from_base): return x.replace(from_base, to_base) else: return x new_file_links = map(replace_link, file_links) # Print correcting to record rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) for old_link,new_link in zip(file_links, new_file_links): if old_link != new_link: touched = True record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)]) if touched: print record_xml_output(rec) print "</collection>"
def create_xml(recid, tags, experiment): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'a': search = 'find a ' + value + ' and exp ' + experiment new_value = convert_search_to_inspire_id(search) if new_value[0]: flag = True correct_subfields.append(('i', new_value[0])) if new_value[1]: flag = True orcid_value = 'ORCID:' + new_value[1] correct_subfields.append(('j', orcid_value)) correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) #return print_rec(correct_record) if flag: #print print_rec(correct_record) return print_rec(correct_record)
def check_records(records, doi_field="0247_a", extra_subfields=(("2", "DOI"), ("9", "bibcheck"))): """ Find the DOI for the records using crossref and add it to the specified field. This plugin won't ask for the DOI if it's already set. """ records_to_check = {} for record in records: has_doi = False for position, value in record.iterfield("0247_2"): if value.lower() == "doi": has_doi = True break if not has_doi: records_to_check[record.record_id] = record dois = get_doi_for_records(records_to_check.values()) for record_id, doi in dois.iteritems(): record = records_to_check[record_id] dup_doi_recid = find_record_from_doi(doi) if dup_doi_recid: record.warn("DOI %s to be added to record %s already exists in record/s %s" % (doi, record_id, dup_doi_recid)) continue subfields = [(doi_field[5], doi.encode("utf-8"))] + map(tuple, extra_subfields) record_add_field(record, tag=doi_field[:3], ind1=doi_field[3], ind2=doi_field[4], subfields=subfields) record.set_amended("Added DOI in field %s" % doi_field)
def check_record(record, overwrite=True): """ Calculates wikipedia link based on viaf id""" maxi = 0 for k in record.iterkeys(): if record[k][-1][-1] > maxi: maxi = record[k][-1][-1] if not overwrite and get_wiki_link_from_record(record): record.warn("Author already had a link to wikipedia") else: control_nos = [] if record.get('035',None): control_nos = [t[1] for d in record.get('035',()) if d and d[0] for t in d[0] if t and t[1]] for control_no in control_nos: if (control_no.find("|(VIAF)") != -1): viaf_id = control_no.split("|(VIAF)")[1] link = get_wikipedia_link(viaf_id) if link: linkfield = ([(CFG_VIAF_LINK_NAME_LABEL_SUBFIELD,CFG_VIAF_WIKIPEDIA_NAME_VALUE_SUBFIELD),(CFG_VIAF_WIKIPEDIA_LINK_SUBFIELD,link)] , '', '', ' ', maxi) if get_wiki_link_from_record(record): for field in record[CFG_VIAF_WIKIPEDIA_LINK_BFO_FIELD]: for subfield in field: if type(subfield) is list and subfield[0] == CFG_VIAF_LINK_NAME_LABEL_SUBFIELD and subfield[1] == CFG_VIAF_WIKIPEDIA_NAME_VALUE_SUBFIELD: for sub in field: if type(sub) is list and sub[0] == CFG_VIAF_WIKIPEDIA_LINK_SUBFIELD: sub[1] = link else: record_add_field(record,CFG_VIAF_WIKIPEDIA_LINK_BFO_FIELD, \ subfields=[(CFG_VIAF_LINK_NAME_LABEL_SUBFIELD,CFG_VIAF_WIKIPEDIA_NAME_VALUE_SUBFIELD),(CFG_VIAF_WIKIPEDIA_LINK_SUBFIELD,link)]) record.set_amended("Added wiki link to author")
def create_xml(recid=None, osti_id=None, doi=None): osti_exists = False doi_exists = False osti_mismatch = False mismatches = [] osti_subfields = [('9', 'OSTI'), ('a', osti_id)] record = get_record(recid) record_link = '<a href="http://inspirehep.net/record/%s">%s</a>' % (str(recid),str(recid)) append_record = {} additions = False errors = None for item in BibFormatObject(recid).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: osti_exists = True elif item['9'] == 'OSTI' and item['a'] != osti_id: osti_mismatch = True mismatches.append(item['a']) for item in BibFormatObject(recid).fields('0247_'): if item.has_key('2') and item.has_key('a'): if item['2'] == 'DOI' and item['a'] == doi: doi_exists = True if osti_exists is False and osti_mismatch is True: print str(recid), "already has a different OSTI ID" errors = "doi %s in record %s should match OSTI ID %s, but the record already contains OSTI ID(s) %s<br />" % (doi, record_link, osti_id, ','.join(mismatches)) return errors if doi_exists is False and osti_exists is True: print str(recid), "contains an OSTI ID but no doi" no_doi = "%s contains OSTI ID %s but not doi %s<br />" % (record_link, osti_id, doi) return no_doi if osti_exists is False and osti_mismatch is False: record_add_field(append_record, '001', controlfield_value=str(recid)) record_add_field(append_record, '035', '', '', subfields=osti_subfields) print "%s: added 035__a:%s" % (str(recid), osti_id) return print_rec(append_record)
def ccreate_xml(recid, rawstring): found = False record = {} record_add_field(record, '001', controlfield_value=str(recid)) rawstring = rawstring.lower().replace('proc. of the', '').replace( 'proc. of', '').replace('.', ' ').replace('(', '').replace(')', '').replace(' -', '') for k, v in term_dict.items(): if k in rawstring: rawstring = rawstring.replace(k, v) matchobj = re.search('(.*?\d{4})', rawstring) if matchobj: search = perform_request_search(p=matchobj.group(), cc='Conferences') if len(search) == 1: for s in search: cnums = get_fieldvalues(s, '111__g') cnum = cnums[0] existing_cnum = get_fieldvalues(recid, '773__w') if cnum not in existing_cnum: print recid, cnum found = True if found: record_add_field(record, '773', '', '', subfields=[('w', cnum)]) return print_rec(record)
def create_xml(author, email, affiliation, experiment, inspire_id, orcid): '''Create the xml file to upload.''' common_fields = {} common_tags = {} author2 = re.sub(r'(.*)\, (.*)', r'\2 \1', author) common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', author2), ('g', 'ACTIVE')] if affiliation: if isinstance(affiliation, (list,)): for aff in affiliation: common_tags['371__'] = [('m', email), ('a', aff), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('a', affiliation), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('z', 'current')] if experiment: common_tags['693__'] = [('e', experiment), ('z', 'current')] common_tags['035__'] = [('9', 'INSPIRE'), ('a', inspire_id)] if orcid: common_tags['035__'] = [('9', 'ORCID'), ('a', orcid)] if SOURCE: common_tags['670__'] = [('a', SOURCE)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields = common_tags[key]) #return common_fields return print_rec(common_fields)
def main(): from_base = 'http://openaire.cern.ch' to_base = config.CFG_SITE_URL # All records recids = search_pattern(p="0->Z", f="8564_u") print "<collection>" for recid in recids: # Get record information touched = False file_links = get_fieldvalues(recid, "8564_u") new_file_links = map(replace_link_func(from_base, to_base), file_links) # Print correcting to record rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) for old_link, new_link in zip(file_links, new_file_links): if old_link != new_link: touched = True record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)]) if touched: print record_xml_output(rec) print "</collection>"
def create_xml(recid): correct_record = {} tag = '8564_' record = get_record(recid) flag = None record_add_field(record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] # print field_instance for c,v in field_instance[0]: # print c,v matchObj = re.search(r'inspirehep\.net/record/\d+/files/fermilab-thesis-.*?\.pdf', v, flags=re.IGNORECASE) if matchObj: print 'yes' flag = True correct_subfields.append(('y', 'Fulltext')) correct_subfields.append((c,v)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record) else: return None
def check_record(record, source_field, new_field, subfield_filter): """ Changes the code of a field to new_field """ from collections import namedtuple from invenio.bibrecord import (record_add_field, record_delete_field, record_get_field_instances) assert len(source_field) == 5 assert len(new_field) == 5 source_field = source_field.replace("_", " ") new_field = new_field.replace("_", " ") assert len(subfield_filter) == 2 SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value']) subfield_filter = SubfieldFilter(*subfield_filter) def filter_passes(subfield_code, result): return subfield_filter.code is None or ( subfield_filter.code in ('%', subfield_code) and subfield_filter.value == result) subfields_list = [] for subfields, ind1, ind2, _, pos in record_get_field_instances( record, source_field[:3], source_field[3], source_field[4]): if any(filter_passes(*s) for s in subfields): subfields_list.append(subfields) record_delete_field(record, source_field[:3], ind1, ind2, pos) for subfields in subfields_list: record_add_field(record, new_field[:3], new_field[3], new_field[4], subfields=subfields) record.set_amended('move from %s to %s: %s' % (source_field.replace(" ", "_"), new_field.replace(" ", "_"), subfields))
def create_xml(recid): """ Searches for duplicate instances of 773 and keeps the good one. """ tag = '773__' tag_value = tag + 'p' journal = get_fieldvalues(recid, tag_value) if len(journal) == 2 and journal[0] == journal[1]: record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', \ controlfield_value=str(recid)) field_instances = record_get_field_instances(record, \ tag[0:3], tag[3], tag[4]) correct_subfields = [] c_value = False for field_instance in field_instances: for code, value in field_instance[0]: if value == 'To appear in the proceedings of': pass elif (code, value) not in correct_subfields: if code == 'c': if c_value: if len(value) > len(c_value): c_value = value else: c_value = value else: correct_subfields.append((code, value)) if c_value: correct_subfields.append(('c', c_value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) return print_rec(correct_record) return None
def create_xml(recid, IDs, tags): """ Replaces specific inspire-ids in records with nothing """ if VERBOSE: print "Working on %s" % recid record = get_record(int(recid)) correct_record = {} record_add_field(correct_record, '001', controlfield_value=recid) for tag in tags: field_instances = record_get_field_instances(record, \ tag[0:3], tag[3], tag[4]) for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'i': if value in IDs: if VERBOSE: print "Getting rid of %s from %s!" % (value, recid) pass else: correct_subfields.append((code, value)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) return print_rec(correct_record)
def check_record(record, source_field, new_field, subfield_filter): """ Changes the code of a field to new_field """ from collections import namedtuple from invenio.bibrecord import (record_add_field, record_delete_field, record_get_field_instances) assert len(source_field) == 5 assert len(new_field) == 5 source_field = source_field.replace("_", " ") new_field = new_field.replace("_", " ") assert len(subfield_filter) == 2 SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value']) subfield_filter = SubfieldFilter(*subfield_filter) def filter_passes(subfield_code, result): return subfield_filter.code is None or ( subfield_filter.code in ('%', subfield_code) and subfield_filter.value == result) subfields_list = [] for subfields, ind1, ind2, _, pos in record_get_field_instances( record, source_field[:3], source_field[3], source_field[4]): if any(filter_passes(*s) for s in subfields): subfields_list.append(subfields) record_delete_field(record, source_field[:3], ind1, ind2, pos) for subfields in subfields_list: record_add_field(record, new_field[:3], new_field[3], new_field[4], subfields=subfields) record.set_amended('move from %s to %s: %s' % (source_field.replace( " ", "_"), new_field.replace(" ", "_"), subfields))
def generate_final_patch(self, patch_dict, recid): """ Generates patch by merging modified patch and added patch Returns the final merged patch containing modified and added fields """ def _add_to_record(record, patch): for tag in patch: for data_tuple in patch[tag]: record_add_field(record, tag, data_tuple[1], data_tuple[2], '', subfields=data_tuple[0]) return record final_patch = {} #tag_list = [] # merge processed and added fields into one patch if 'MOD' in patch_dict: # tag_list = tag_list + patch_dict['MOD'].items() final_patch = _add_to_record(final_patch, patch_dict['MOD']) if 'ADD' in patch_dict: #tag_list = tag_list + patch_dict['ADD'].items() final_patch = _add_to_record(final_patch, patch_dict['ADD']) if 'DEL' in patch_dict: #tag_list = tag_list + patch_dict['DEL'].items() final_patch = _add_to_record(final_patch, patch_dict['DEL']) record_add_field(final_patch, '001', ' ', ' ', recid) return final_patch
def create_xmlrefs(recid): subrefs = [ '%s,%i,' % (old_journal, x) for x in range(vol_change, vol_curr) ] record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, '999', 'C', '5') correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 's' and any(x for x in subrefs if x in value): newval = re.sub(old_journal, repl_journal, value) if VERBOSE: print "%s: Replacing %s with %s" % (recid, value, newval) correct_subfields.append(('s', newval)) else: correct_subfields.append((code, value)) record_add_field(correct_record, '999', 'C', '5', subfields=correct_subfields) return print_rec(correct_record)
def check_record(record): """ If 710__g contains 'for the' or 'on behalf of' check whether there is an author name. Split and clean collaboration. """ from invenio.bibrecord import record_modify_subfield from invenio.bibrecord import record_add_field message = "" rec_modified = False rec_holdingpen = False for position, coll in record.iterfield("710__g"): new, author = cleancoll(coll) if new: message = "%s changed %s -> %s\n" % (message, coll, new) record_modify_subfield(record, "710", "g", new, position[2], field_position_local=position[1]) rec_modified = True if author: message = "%s found author: %s in %s\n" % (message, author, coll) if record.has_key("100"): akey = "700" else: akey = "100" record_add_field(record, akey, ' ', ' ', '', [('a', author)]) rec_holdingpen = True if rec_modified: record.set_amended(message) if rec_holdingpen: record.holdingpen = True
def create_new_pdg_fields(recids, pdg_data): _print_out("Creating new PDG fields for " + str(len(recids)) + " records...") records = {} for recid in recids: records[recid] = {} record_add_field(records[recid], '001', controlfield_value=str(recid)) pdg_fields = pdg_data[recid] for field in pdg_fields: position = record_add_field(records[recid], '084', ' ', ' ') record_add_subfield_into(records[recid], '084', '2', 'PDG', field_position_global=position) record_add_subfield_into(records[recid], '084', '9', 'PDG', field_position_global=position) record_add_subfield_into(records[recid], '084', 'a', field, field_position_global=position) return records
def create_xml(recid): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if volume_letter: if code == 'p': correct_subfields.append(('p', repl_journal)) elif code == 'v': volume = get_fieldvalues(recid, '773__v') for v in volume: if v[0].isalpha(): correct_subfields.append(('v', v)) else: new_volume = volume_letter + v correct_subfields.append(('v', new_volume)) else: correct_subfields.append((code, value)) else: if code == 'p': correct_subfields.append(('p', repl_journal)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) return print_rec(correct_record)
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes( template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values( template_field_instance, code)[0]) record_order_subfields(rec) return rec
def check_record(record): """ Split fields """ from invenio.bibrecord import record_delete_field from invenio.bibrecord import record_add_field message = "" marc = '693__e' tag = marc[:3] if not record.has_key(tag): continue ind1 = marc[3].replace('_', ' ') ind2 = marc[4].replace('_', ' ') sfcode = marc[5] to_split = fields_to_split(record, tag, ind1, ind2, sfcode) if not to_split: continue # work from the back to try to preserve order positions = to_split.keys() positions.sort(reverse=True) for global_pos in positions: (parts, rest_before, rest_after) = to_split[global_pos] message += " - split %s %s" % (tag, parts) record_delete_field(record, tag, ind1, ind2, field_position_global=global_pos) parts.reverse() for subfield in parts: field = rest_before + [subfield, ] + rest_after record_add_field(record, tag, ind1, ind2, '', field, field_position_global=global_pos) if message: record.set_amended(message)
def check_record(record): """ move 8564_u/y to 035__a/9 """ delcount = 0 ostiids = set() for pos, val in record.iterfield('8564_u', subfield_filter=('y', provenance)): if val: ostiidmatch = ostiidre.match(val) if ostiidmatch: ostiid = ostiidmatch.group(1) if ostiid in ostiids: continue else: ostiids.add(ostiid) subfields_to_add = (('9', 'OSTI'), ('a', ostiid)) record_add_field(record, tag='035', ind1='_', ind2='_', subfields=subfields_to_add) record.delete_field((pos[0][0:3], pos[1] - delcount, None)) delcount += 1 record.set_amended("moved link for %s:%s" % (provenance, ostiid)) else: record.warn('no match for [%s]' % val)
def update_record(recid, hal_id, bibupload): rec = {} record_add_field(rec, '001', controlfield_value=str(recid)) record_add_field(rec, '035', subfields=[('a', hal_id), ('9', 'HAL')]) write_message("Record %s matched HAL record %s" % (recid, hal_id)) bibupload.add(record_xml_output(rec))
def create_xml(recid, arxiv_ids): old_record = get_record(recid) attached_files = record_get_field_instances(old_record, tag='856', ind1='4') fields_to_add = [f for f in attached_files if check_arxiv_url(f, arxiv_ids)] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '856', fields_to_add) return print_rec(record)
def check_records(records, doi_field="0247_a", extra_subfields=(("2", "DOI"), ("9", "bibcheck")), create_ticket=False): """ Find the DOI for the records using crossref and add it to the specified field. This plugin won't ask for the DOI if it's already set. """ records_to_check = {} for record in records: has_doi = False for position, value in record.iterfield("0247_2"): if value.lower() == "doi": has_doi = True break if not has_doi: records_to_check[record.record_id] = record dois = get_doi_for_records(records_to_check.values()) for record_id, doi in dois.iteritems(): record = records_to_check[record_id] dup_doi_recid = find_record_from_doi(doi) if dup_doi_recid: record.warn("DOI %s to be added to record %s already exists in record/s %s" % (doi, record_id, dup_doi_recid)) if create_ticket: subject = "DOI conflict record #%s" % str(record_id) res = BIBCATALOG_SYSTEM.ticket_submit( subject=subject, recordid=record_id, text=subject, queue="Bibcheck" ) if res > 0: msg = """ DOI %s to be added to record %s already exists in record/s %s Record with conflict: %s Record with original DOI: %s Merge both records: %s """ dup_doi_recid = int(dup_doi_recid) record_id = int(record_id) msg = msg % ( doi, record_id, dup_doi_recid, "%s/record/%s" % (CFG_SITE_URL, record_id), "%s/record/%s" % (CFG_SITE_URL, dup_doi_recid), "%s/record/merge/?#recid1=%s&recid2=%s" % (CFG_SITE_URL, min(dup_doi_recid, record_id), max(dup_doi_recid, record_id)) ) if isinstance(msg, unicode): msg = msg.encode("utf-8") BIBCATALOG_SYSTEM.ticket_comment(None, res, msg) continue subfields = [(doi_field[5], doi.encode("utf-8"))] + map(tuple, extra_subfields) record_add_field(record, tag=doi_field[:3], ind1=doi_field[3], ind2=doi_field[4], subfields=subfields) record.set_amended("Added DOI in field %s" % doi_field)
def create_xml(recid, tags_024): record = {} record_add_field(record, '001', controlfield_value=str(recid)) for doi in set(tags_024): subfields = [('2', 'DOI'), ('a', doi)] record_add_field(record, '024', '7', subfields=subfields) return print_rec(record)
def create_our_record(recid): old_record = get_record(recid) instances = record_get_field_instances(old_record, '980') new_instances = [l.field for l in set(OurInstance(i) for i in instances if field_get_subfield_instances(i) != [('a', 'unknown')])] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '980', new_instances) return print_rec(record)
def _add_to_record(record, patch): for tag in patch: for data_tuple in patch[tag]: record_add_field(record, tag, data_tuple[1], data_tuple[2], '', subfields=data_tuple[0]) return record
def inject_recid(data): """ """ updated_records = [] for match in data: original_record_bibrec = create_records(match)[0][0] if not record_has_field(original_record_bibrec, '001'): rec_id = re_matched_recid.findall(match)[0][1] record_add_field(original_record_bibrec, tag='001', controlfield_value=rec_id) updated_records.append(original_record_bibrec) return updated_records
def generate_marc_to_append(local, remote): """ Generates MarcXML to append an 035 remote ID to a record """ newrec = {} record_add_field(newrec, '001', controlfield_value=str(local)) field_pos = record_add_field(newrec, '035') record_add_subfield_into(newrec, '035', '9', REMOTE_INSTANCE, field_position_global=field_pos) record_add_subfield_into(newrec, '035', 'a', str(remote), field_position_global=field_pos) return record_xml_output(newrec)
def create_xml(recid, osti_id, new_id, search): record = {} record_add_field(record, '001', controlfield_value=str(recid)) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) try: return print_rec(record) except: print "Something wrong: " + search return False
def handle_tags(recid, tags, d): record = get_record(recid) correct_record = {} need_email = False need_author = False for tag in tags: original_tag = tag field_instances = \ record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] #correct_subfields_aff = [] for field_instance in field_instances: correct_record = {} correct_subfields = [] for code, value in field_instance[0]: if code == 'm' or code == 'u': tag = '371__' if code == 'u': code = 'a' if code == 'm' and not value in list_of_emails: list_of_emails.append(value) inHepnames_email = get_hepnames_recid_from_email(value) if verbose: print 'inHepnames_email=', inHepnames_email #if not inHepnames_email: need_email = value else: tag = original_tag if tag == '700__' : tag = '100__' if code != 'v': correct_subfields = [(code, value)] if tag == '371__': correct_subfields.append(('z', 'current')) if code == 'a' and tag == '100__' and not value in list_of_authors: list_of_authors.append(value) nicename = re.sub(r'(.*)\, (.*)',r'\2 \1',value) correct_subfields.append(('q', nicename)) search = "find a " + value search = search + " or ea " + value inHepnames_author = \ perform_request_search(p=search, cc='HepNames') if verbose: print 'inHepnames_author=', inHepnames_author if not inHepnames_author: need_author = True if re.search(r"'",value): need_author = False if code == 'i' : need_author = False record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if d: correct_record.update(d) if need_author or need_email: if verbose and inHepnames_author: print "Margaret: This author is already in", \ inHepnames_author, need_email print print_rec(correct_record) need_email = False need_author = False return correct_record
def _modify_record(self, recid, test_func, replace_func, include_func, append_colls=[], replace_colls=[]): """ Generate record a MARCXML file @param test_func: Function to test if a collection id should be changed @param replace_func: Function to replace the collection id. @param include_func: Function to test if collection should be included """ rec = get_record(recid) newcolls = [] dirty = False try: colls = rec['980'] if replace_colls: for c in replace_colls: newcolls.append([('a', c)]) dirty = True else: for c in colls: try: # We are only interested in subfield 'a' code, val = c[0][0] if test_func(code, val): c[0][0] = replace_func(code, val) dirty = True if include_func(code, val): newcolls.append(c[0]) else: dirty = True except IndexError: pass for c in append_colls: newcolls.append([('a', c)]) dirty = True except KeyError: return False if not dirty: return False rec = {} record_add_field(rec, '001', controlfield_value=str(recid)) for subfields in newcolls: record_add_field(rec, '980', subfields=subfields) return rec
def create_xml(recid, experiment_id): "Create xml to append to INSPIRE record." record = {} tag_dict = {} tag = '035__' record_add_field(record, '001', controlfield_value=str(recid)) tag_dict[tag] = [('9', EXPERIMENT), ('a', experiment_id)] record_add_field(record, tag[0:3], tag[3], tag[4], \ subfields=tag_dict['035__']) return print_rec(record)
def apply_hepnames_updates(hepname_updates): bibupload = ChunkedBibUpload(mode='a', user='******') for recid, entry in hepname_updates.iteritems(): record = {} record_add_field(record, '001', controlfield_value=str(recid)) for key, value in entry.iteritems(): if key in ('ORCID', 'ORIGINAL_BAI', 'INSPIRE', 'KAKEN'): if key == 'ORIGINAL_BAI': key = 'BAI' record_add_field(record, '035', subfields=[('a', value), ('9', key)]) write_message(record_xml_output(record)) bibupload.add(record_xml_output(record))
def create_xml(recid, msnet): '''Creates xml record to append MSNET ID''' common_fields = {} common_tags = {} record_add_field(common_fields, '001', controlfield_value=str(recid)) common_tags['035__'] = [('9', 'MSNET'), ('a', msnet)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) return print_rec(common_fields)
def migrate_record(recid, substitutions=[], additions=[]): from invenio.search_engine import get_record from invenio.bibrecord import record_add_field from invenio.bibupload import bibupload from invenio.search_engine import get_fieldvalues rec = get_record(recid) if get_fieldvalues(recid, '536__c'): additions = additions + [ [('a', 'user-ecfunded')], ] if recid == 941: additions = additions + [ [('a', 'publication'), ('b', 'workingpaper')], ] for k in rec.keys(): if k not in ['001', '980']: del rec[k] if substitutions: try: newcolls = [] colls = rec['980'] for c in colls: try: # We are only interested in subfield 'a' code, val = c[0][0] for old, new in substitutions: if val == old: c[0][0] = (code, new) break except IndexError: pass newcolls.append(c) del rec['980'] for c in newcolls: record_add_field(rec, '980', subfields=c[0]) except KeyError: warnings.warn("Could not migrate record %s" % recid) return if additions: for a in additions: record_add_field(rec, '980', subfields=a) print rec bibupload(rec, opt_mode="correct")
def create_xml(experiment,spks,title): common_fields = {} common_tags = {} common_tags['980__'] = [('a', 'EXPERIMENT')] common_tags['702__'] = [('a', spks), ('z', 'current')] common_tags['245__'] = [('a', title)] common_tags['119__'] = [('a', experiment),('u', 'J-PARC')] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) #return common_fields print print_rec(common_fields)