def handle_tags(recid, tags, d): record = get_record(recid) correct_record = {} need_email = False need_author = False for tag in tags: original_tag = tag field_instances = \ record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] #correct_subfields_aff = [] for field_instance in field_instances: correct_record = {} correct_subfields = [] for code, value in field_instance[0]: if code == 'm' or code == 'u': tag = '371__' if code == 'u': code = 'a' if code == 'm' and not value in list_of_emails: list_of_emails.append(value) inHepnames_email = get_hepnames_recid_from_email(value) if verbose: print 'inHepnames_email=', inHepnames_email #if not inHepnames_email: need_email = value else: tag = original_tag if tag == '700__' : tag = '100__' if code != 'v': correct_subfields = [(code, value)] if tag == '371__': correct_subfields.append(('z', 'current')) if code == 'a' and tag == '100__' and not value in list_of_authors: list_of_authors.append(value) nicename = re.sub(r'(.*)\, (.*)',r'\2 \1',value) correct_subfields.append(('q', nicename)) search = "find a " + value search = search + " or ea " + value inHepnames_author = \ perform_request_search(p=search, cc='HepNames') if verbose: print 'inHepnames_author=', inHepnames_author if not inHepnames_author: need_author = True if re.search(r"'",value): need_author = False if code == 'i' : need_author = False record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if d: correct_record.update(d) if need_author or need_email: if verbose and inHepnames_author: print "Margaret: This author is already in", \ inHepnames_author, need_email print print_rec(correct_record) need_email = False need_author = False return correct_record
def create_xml(experiment,spks,title): common_fields = {} common_tags = {} common_tags['980__'] = [('a', 'EXPERIMENT')] common_tags['702__'] = [('a', spks), ('z', 'current')] common_tags['245__'] = [('a', title)] common_tags['119__'] = [('a', experiment),('u', 'J-PARC')] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) #return common_fields print print_rec(common_fields)
def create_xml(experiment, spks, title): common_fields = {} common_tags = {} common_tags['980__'] = [('a', 'EXPERIMENT')] common_tags['702__'] = [('a', spks), ('z', 'current')] common_tags['245__'] = [('a', title)] common_tags['119__'] = [('a', experiment), ('u', 'J-PARC')] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) #return common_fields print print_rec(common_fields)
def create_xml(author, email, affiliation, experiment, inspire_id, orcid, native_name): '''Create the xml file to upload.''' common_fields = {} common_tags = {} author2 = re.sub(r'(.*)\, (.*)', r'\2 \1', author) common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', author2), ('g', 'ACTIVE')] if affiliation: if isinstance(affiliation, (list, )): for aff in affiliation: common_tags['371__'] = [('m', email), ('a', aff), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('a', affiliation), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('z', 'current')] if experiment: common_tags['693__'] = [('e', experiment), ('z', 'current')] common_tags['035__'] = [('9', 'INSPIRE'), ('a', inspire_id)] if orcid: common_tags['035__'] = [('9', 'ORCID'), ('a', orcid)] if SOURCE: common_tags['670__'] = [('a', SOURCE)] if native_name: common_tags['880__'] = [('a', native_name)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields = common_tags[key]) #return common_fields return print_rec(common_fields)
def create_xml(recid): correct_record = {} tag = '8564_' record = get_record(recid) flag = None record_add_field(record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] # print field_instance for c,v in field_instance[0]: # print c,v matchObj = re.search(r'inspirehep\.net/record/\d+/files/fermilab-thesis-.*?\.pdf', v, flags=re.IGNORECASE) if matchObj: print 'yes' flag = True correct_subfields.append(('y', 'Fulltext')) correct_subfields.append((c,v)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record) else: return None
def ccreate_xml(recid, rawstring): found = False record = {} record_add_field(record, '001', controlfield_value=str(recid)) rawstring = rawstring.lower().replace('proc. of the', '').replace( 'proc. of', '').replace('.', ' ').replace('(', '').replace(')', '').replace(' -', '') for k, v in term_dict.items(): if k in rawstring: rawstring = rawstring.replace(k, v) matchobj = re.search('(.*?\d{4})', rawstring) if matchobj: search = perform_request_search(p=matchobj.group(), cc='Conferences') if len(search) == 1: for s in search: cnums = get_fieldvalues(s, '111__g') cnum = cnums[0] existing_cnum = get_fieldvalues(recid, '773__w') if cnum not in existing_cnum: print recid, cnum found = True if found: record_add_field(record, '773', '', '', subfields=[('w', cnum)]) return print_rec(record)
def create_xml(recid, tags): """Create xml file to replace to 100, 700 block.""" record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], \ tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'v': try: if VERBOSE: print len(AFFILIATIONS_DONE) affiliation_key = re.sub(r'\W+', ' ', value).upper() if not affiliation_key in AFFILIATIONS_DONE: new_values = get_aff(value) AFFILIATIONS_DONE[affiliation_key] = new_values for new_value in AFFILIATIONS_DONE[affiliation_key]: correct_subfields.append(('u', \ new_value.lstrip(' '))) flag = True except TypeError: pass correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record)
def test_get_record(self): for recid in perform_request_search(p=""): # Our bibrecord we want to test record = self.records_cache[recid] # Reference implementation original_record = get_record_original(recid) self.assertXmlEqual(record.to_xml(), print_rec(original_record))
def create_xml(recid): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if volume_letter: if code == 'p': correct_subfields.append(('p', repl_journal)) elif code == 'v': volume = get_fieldvalues(recid, '773__v') for v in volume: if v[0].isalpha(): correct_subfields.append(('v', v)) else: new_volume = volume_letter + v correct_subfields.append(('v', new_volume)) else: correct_subfields.append((code, value)) else: if code == 'p': correct_subfields.append(('p', repl_journal)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) return print_rec(correct_record)
def create_xml(author, email, affiliation, experiment, inspire_id, orcid): '''Create the xml file to upload.''' common_fields = {} common_tags = {} author2 = re.sub(r'(.*)\, (.*)', r'\2 \1', author) common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', author2), ('g', 'ACTIVE')] if affiliation: if isinstance(affiliation, (list,)): for aff in affiliation: common_tags['371__'] = [('m', email), ('a', aff), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('a', affiliation), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('z', 'current')] if experiment: common_tags['693__'] = [('e', experiment), ('z', 'current')] common_tags['035__'] = [('9', 'INSPIRE'), ('a', inspire_id)] if orcid: common_tags['035__'] = [('9', 'ORCID'), ('a', orcid)] if SOURCE: common_tags['670__'] = [('a', SOURCE)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields = common_tags[key]) #return common_fields return print_rec(common_fields)
def create_xml(recid): """ Searches for duplicate instances of 773 and keeps the good one. """ tag = '773__' tag_value = tag + 'p' journal = get_fieldvalues(recid, tag_value) if len(journal) == 2 and journal[0] == journal[1]: record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', \ controlfield_value=str(recid)) field_instances = record_get_field_instances(record, \ tag[0:3], tag[3], tag[4]) correct_subfields = [] c_value = False for field_instance in field_instances: for code, value in field_instance[0]: if value == 'To appear in the proceedings of': pass elif (code, value) not in correct_subfields: if code == 'c': if c_value: if len(value) > len(c_value): c_value = value else: c_value = value else: correct_subfields.append((code, value)) if c_value: correct_subfields.append(('c', c_value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) return print_rec(correct_record) return None
def create_xml(recid, tags, experiment): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'a': search = 'find a ' + value + ' and exp ' + experiment new_value = convert_search_to_inspire_id(search) if new_value[0]: flag = True correct_subfields.append(('i', new_value[0])) if new_value[1]: flag = True orcid_value = 'ORCID:' + new_value[1] correct_subfields.append(('j', orcid_value)) correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) #return print_rec(correct_record) if flag: #print print_rec(correct_record) return print_rec(correct_record)
def create_xml(recid, IDs, tags): """ Replaces specific inspire-ids in records with nothing """ if VERBOSE: print "Working on %s" % recid record = get_record(int(recid)) correct_record = {} record_add_field(correct_record, '001', controlfield_value=recid) for tag in tags: field_instances = record_get_field_instances(record, \ tag[0:3], tag[3], tag[4]) for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'i': if value in IDs: if VERBOSE: print "Getting rid of %s from %s!" % (value, recid) pass else: correct_subfields.append((code, value)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) return print_rec(correct_record)
def create_xml(author,nicname,vname,email,af,rank,experiment,start): common_fields = {} common_tags = {} common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', nicename), ('g', 'ACTIVE')] common_tags['371__'] = [('m', email),('a', af),('r', rank), ('z', 'current')] common_tags['400__'] = [('a', vname)] common_tags['693__'] = [('a', experiment),('s', start), ('z', 'current')] common_tags['670__'] = [('a', 'ihep')] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) #return common_fields print print_rec(common_fields)
def create_xml(recid, correction_dict): """Fix the citations of Fermilab reports.""" tags = [REF] record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = False for (tag, field_instance) in \ [(tag, field_instance) for tag in tags \ for field_instance in record_get_field_instances(record, \ tag[0:3], tag[3], tag[4])]: correct_subfields = [] for code, value in field_instance[0]: if code == 'r' and value.upper() in correction_dict: print 'Was:', value value = correction_dict[value.upper()] print 'Now:', value flag = True correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if flag: return print_rec(correct_record) else: return None
def create_xmlrefs(recid): subrefs = [ '%s,%i,' % (old_journal, x) for x in range(vol_change, vol_curr) ] record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, '999', 'C', '5') correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 's' and any(x for x in subrefs if x in value): newval = re.sub(old_journal, repl_journal, value) if VERBOSE: print "%s: Replacing %s with %s" % (recid, value, newval) correct_subfields.append(('s', newval)) else: correct_subfields.append((code, value)) record_add_field(correct_record, '999', 'C', '5', subfields=correct_subfields) return print_rec(correct_record)
def create_xml(recid=None, osti_id=None, doi=None): osti_exists = False doi_exists = False osti_mismatch = False mismatches = [] osti_subfields = [('9', 'OSTI'), ('a', osti_id)] record = get_record(recid) record_link = '<a href="http://inspirehep.net/record/%s">%s</a>' % (str(recid),str(recid)) append_record = {} additions = False errors = None for item in BibFormatObject(recid).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: osti_exists = True elif item['9'] == 'OSTI' and item['a'] != osti_id: osti_mismatch = True mismatches.append(item['a']) for item in BibFormatObject(recid).fields('0247_'): if item.has_key('2') and item.has_key('a'): if item['2'] == 'DOI' and item['a'] == doi: doi_exists = True if osti_exists is False and osti_mismatch is True: print str(recid), "already has a different OSTI ID" errors = "doi %s in record %s should match OSTI ID %s, but the record already contains OSTI ID(s) %s<br />" % (doi, record_link, osti_id, ','.join(mismatches)) return errors if doi_exists is False and osti_exists is True: print str(recid), "contains an OSTI ID but no doi" no_doi = "%s contains OSTI ID %s but not doi %s<br />" % (record_link, osti_id, doi) return no_doi if osti_exists is False and osti_mismatch is False: record_add_field(append_record, '001', controlfield_value=str(recid)) record_add_field(append_record, '035', '', '', subfields=osti_subfields) print "%s: added 035__a:%s" % (str(recid), osti_id) return print_rec(append_record)
def create_xml(recid, input_dict): '''Create marcxml file from.''' record = {} record_add_field(record, '001', controlfield_value=str(recid)) eprint = input_dict['eprint'] input_dict['035__a'] = 'oai:arXiv.org:' + eprint input_dict['037__a'] = eprint if ARXIV_REGEX_NEW.match(eprint): input_dict['037__a'] = 'arXiv:' + eprint for tag in input_dict: if tag in ('eprint', 'primarch', '0247_a'): continue if tag.startswith('65017a'): class_number = 2 else: class_number = 9 subfields = [] if tag != '269__c': subfields.append((class_number, 'arXiv')) subfields.append((tag[5], input_dict[tag])) if tag == '037__a': subfields.append(('c', input_dict['primarch'])) record_add_field(record, tag[0:3], tag[3], tag[4], subfields=subfields) return print_rec(record)
def create_xml(recid, fname=None, oaff=None): affs = [a for a in oaff] record = get_record(recid) auth_location = record_get_field_instances(record, '100', '', '')[0][4] record_delete_field(record, '700', '', '') for x in affs: record_add_subfield_into(record, '100', 'u', x, field_position_global=auth_location) return print_rec(record)
def create_xml(recid, arxiv_ids): old_record = get_record(recid) attached_files = record_get_field_instances(old_record, tag='856', ind1='4') fields_to_add = [f for f in attached_files if check_arxiv_url(f, arxiv_ids)] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '856', fields_to_add) return print_rec(record)
def create_xml(recid, tags_024): record = {} record_add_field(record, '001', controlfield_value=str(recid)) for doi in set(tags_024): subfields = [('2', 'DOI'), ('a', doi)] record_add_field(record, '024', '7', subfields=subfields) return print_rec(record)
def create_our_record(recid): old_record = get_record(recid) instances = record_get_field_instances(old_record, '980') new_instances = [l.field for l in set(OurInstance(i) for i in instances if field_get_subfield_instances(i) != [('a', 'unknown')])] record = {} record_add_field(record, '001', controlfield_value=str(recid)) record_add_fields(record, '980', new_instances) return print_rec(record)
def create_xml(recid, osti_id, new_id, search): record = {} record_add_field(record, '001', controlfield_value=str(recid)) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) try: return print_rec(record) except: print "Something wrong: " + search return False
def create_xml(recid, experiment_id): "Create xml to append to INSPIRE record." record = {} tag_dict = {} tag = '035__' record_add_field(record, '001', controlfield_value=str(recid)) tag_dict[tag] = [('9', EXPERIMENT), ('a', experiment_id)] record_add_field(record, tag[0:3], tag[3], tag[4], \ subfields=tag_dict['035__']) return print_rec(record)
def move_drafts_articles_to_ready(journal_name, issue): """ Move draft articles to their final "collection". To do so we rely on the convention that an admin-chosen keyword must be removed from the metadata """ protected_datafields = ['100', '245', '246', '520', '590', '700'] keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name) collections_to_refresh = {} categories = get_journal_categories(journal_name, issue) for category in categories: articles = get_journal_articles(journal_name, issue, category) for order, recids in articles.iteritems(): for recid in recids: record_xml = format_record(recid, of='xm') if not record_xml: continue new_record_xml_path = os.path.join(CFG_TMPDIR, 'webjournal_publish_' + \ str(recid) + '.xml') if os.path.exists(new_record_xml_path): # Do not modify twice continue record_struc = create_record(record_xml) record = record_struc[0] new_record = update_draft_record_metadata( record, protected_datafields, keyword_to_remove) new_record_xml = print_rec(new_record) if new_record_xml.find(keyword_to_remove) >= 0: new_record_xml = new_record_xml.replace( keyword_to_remove, '') # Write to file new_record_xml_file = file(new_record_xml_path, 'w') new_record_xml_file.write(new_record_xml) new_record_xml_file.close() # Submit task_low_level_submission('bibupload', 'WebJournal', '-c', new_record_xml_path) task_low_level_submission('bibindex', 'WebJournal', '-i', str(recid)) for collection in get_all_collections_of_a_record(recid): collections_to_refresh[collection] = '' # Refresh collections collections_to_refresh.update([ (c, '') for c in get_journal_collection_to_refresh_on_release(journal_name) ]) for collection in collections_to_refresh.keys(): task_low_level_submission('webcoll', 'WebJournal', '-f', '-p', '2', '-c', collection)
def create_xml(recid, msnet): '''Creates xml record to append MSNET ID''' common_fields = {} common_tags = {} record_add_field(common_fields, '001', controlfield_value=str(recid)) common_tags['035__'] = [('9', 'MSNET'), ('a', msnet)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) return print_rec(common_fields)
def move_drafts_articles_to_ready(journal_name, issue): """ Move draft articles to their final "collection". To do so we rely on the convention that an admin-chosen keyword must be removed from the metadata """ protected_datafields = ['100', '245', '246', '520', '590', '700'] keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name) collections_to_refresh = {} categories = get_journal_categories(journal_name, issue) for category in categories: articles = get_journal_articles(journal_name, issue, category) for order, recids in articles.iteritems(): for recid in recids: record_xml = format_record(recid, of='xm') if not record_xml: continue new_record_xml_path = os.path.join(CFG_TMPDIR, 'webjournal_publish_' + \ str(recid) + '.xml') if os.path.exists(new_record_xml_path): # Do not modify twice continue record_struc = create_record(record_xml) record = record_struc[0] new_record = update_draft_record_metadata(record, protected_datafields, keyword_to_remove) new_record_xml = print_rec(new_record) if new_record_xml.find(keyword_to_remove) >= 0: new_record_xml = new_record_xml.replace(keyword_to_remove, '') # Write to file new_record_xml_file = file(new_record_xml_path, 'w') new_record_xml_file.write(new_record_xml) new_record_xml_file.close() # Submit task_low_level_submission('bibupload', 'WebJournal', '-c', new_record_xml_path) task_low_level_submission('bibindex', 'WebJournal', '-i', str(recid)) for collection in get_all_collections_of_a_record(recid): collections_to_refresh[collection] = '' # Refresh collections collections_to_refresh.update([(c, '') for c in get_journal_collection_to_refresh_on_release(journal_name)]) for collection in collections_to_refresh.keys(): task_low_level_submission('webcoll', 'WebJournal', '-f', '-p', '2','-c', collection)
def filter_marcrec(marcrec, main_field=bconfig.CFG_MAIN_FIELD, others=bconfig.CFG_OTHER_FIELDS): """Removes the unwanted fields and returns xml""" if isinstance(main_field, basestring): main_field = [main_field] if isinstance(others, basestring): others = [others] key_map = ["001"] for field in main_field + others: tag, ind1, ind2 = bibclassify_engine._parse_marc_code(field) key_map.append(tag) return bibrecord.print_rec(marcrec, 1, tags=key_map)
def main(recids): out = open(DEST_FILE, 'w') for done, recid in enumerate(recids): if done % 50 == 0: print 'done %s of %s' % (done + 1, len(recids)) xml = print_rec(get_record(recid)) out.write(xml) out.close() print 'done'
def create_xml(recid): """ Replaces an email with an INSPIRE ID and an ORCID where possible """ tags = ['999C5'] record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) time_stamp = record_get_field_value(record, '005') record_add_field(correct_record, '005', controlfield_value=time_stamp) #We don't want to update records that already have the DOI. flag_instances = [] for (tag, field_instance) in \ [(tag, field_instance) for tag in tags \ for field_instance in record_get_field_instances(record, \ tag[0:3], tag[3], tag[4])]: original_subfields = [] for code, value in field_instance[0]: original_subfields.append((code, value)) correct_subfields = [] flag_instance = False for code, value in field_instance[0]: if code == 'a' and value.startswith('doi:10.18429/JAC') \ and value not in JACOW_DOIS: doi = fix_jacow_doi(value) if doi: print 'DOI missing from INSPIRE', doi value = doi flag_instance = True if code in ('m', 'u', 'x', 'r'): doi = extract_jacow_doi(value) if doi: if ('a', doi) in original_subfields: flag_instance = False elif ('a', doi) not in correct_subfields: correct_subfields.append(('a', doi)) flag_instance = True if (code, value) in correct_subfields: flag_instance = False else: correct_subfields.append((code, value)) #flag_instance = True flag_instances.append(flag_instance) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) if any(flag_instances): return print_rec(correct_record) return None
def filter_marcrec(marcrec, main_field=bconfig.CFG_MAIN_FIELD, others=bconfig.CFG_OTHER_FIELDS): """Removes the unwanted fields and returns xml""" if isinstance(main_field, basestring): main_field = [main_field] if isinstance(others, basestring): others = [others] key_map = ['001'] for field in main_field + others: tag, ind1, ind2 = bibclassify_engine._parse_marc_code(field) key_map.append(tag) return bibrecord.print_rec(marcrec, 1, tags=key_map)
def create_xml(input_dict): """ This function create_xml takes a metadata dictionary. """ #print input_dict metadata_dict = extract_metadata(input_dict) #print metadata_dict if not metadata_dict: return None record = {} try: record_add_field(record, '001', controlfield_value=str(metadata_dict['recid'])) except KeyError: logging.info('Problem with finding recid: ' + str(input_dict)) try: journal = metadata_dict['journal'] volume = metadata_dict['volume'] page = metadata_dict['page'] year = metadata_dict['year'] pubnote = [('p', journal), ('v', volume), ('c', page)] if journal == 'ICRC': journal = journal + ' ' + year pubnote = [('q', journal), ('v', volume), ('c', page)] else: pubnote.append(('y', year)) record_add_field(record, '773', '', '', subfields=pubnote) except KeyError: #logging.info('Problem with extracting j,v,p,y: ' + str(input_dict)) #return None pass try: doi = [('a', metadata_dict['doi']), ('2', 'DOI'), ('9', 'ADS')] except KeyError: logging.info('Problem with extracting doi: ' + str(input_dict)) return None #Sometimes doi can be just '' in ADS xml dump. if metadata_dict['doi']: record_add_field(record, '024', '7', '', subfields=doi) try: bibcode = [('a', metadata_dict['bibcode']), ('9', 'ADS')] record_add_field(record, '035', '', '', subfields=bibcode) except KeyError: #logging.info('Not adding bibcode: ' + str(input_dict)) #return None pass return print_rec(record)
def create_xml(recid, tags, forceFlag): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'u' or code == 'v': if re.search(r'UNDEFINED', value) or forceFlag: new_value = get_aff(value) if new_value: correct_subfields.append(('v', value)) value = new_value code = 'u' flag = True correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) #return print_rec(correct_record) if flag: print print_rec(correct_record)
def create_xml(recid, texkey): """ Create the marcxml snippet with the new texkey @param recid: recid of the record to be updated @type: int @param texkey: texkey that has been generated @type: str @return: marcxml with the fields to be record_add_field @rtype: str """ record = {} record_add_field(record, '001', controlfield_value=str(recid)) subfields_toadd = [('a', texkey), ('9', 'INSPIRETeX')] record_add_field(record, tag='035', subfields=subfields_toadd) return print_rec(record)
def osti_add_id(): search = "037__a:FERMILAB* 8564_y:OSTI -035__9:OSTI" x = perform_request_search(p = search, cc = 'HEP') x = x[:1000] string = '' for r in x: record = {} record_add_field(record, '001', controlfield_value=str(r)) for u in get_fieldvalues(r, '8564_u'): if re.search('osti.gov', u): matchObj = re.search(r'(\d+)', u) if matchObj : osti_id = matchObj.group(1) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) string = string + print_rec(record) return string
def osti_add_id(): search = "037__a:FERMILAB* 8564_y:OSTI -035__9:OSTI" x = perform_request_search(p=search, cc='HEP') x = x[:1000] string = '' for r in x: record = {} record_add_field(record, '001', controlfield_value=str(r)) for u in get_fieldvalues(r, '8564_u'): if re.search('osti.gov', u): matchObj = re.search(r'(\d+)', u) if matchObj: osti_id = matchObj.group(1) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) string = string + print_rec(record) return string
def create_xml(recid, tags, experiment, author_dict): """Create the new author list with IDs.""" record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) time_stamp = record_get_field_value(record, '005') record_add_field(correct_record, '005', controlfield_value=time_stamp) flag = None for tag in tags: field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if code == 'a': if value not in author_dict: search = 'find a ' + value + ' and exp ' + experiment if VERBOSE: print search author_dict[value] = \ convert_search_to_inspire_id(search) if VERBOSE: print author_dict[value] if author_dict[value][0]: flag = True correct_subfields.append(('i', author_dict[value][0])) if author_dict[value][1]: flag = True orcid_value = 'ORCID:' + author_dict[value][1] correct_subfields.append(('k', orcid_value)) correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) #return print_rec(correct_record) if flag: #print print_rec(correct_record) return [print_rec(correct_record), author_dict] else: return [None, author_dict]
def move_drafts_articles_to_ready(journal_name, issue): """ Move draft articles to their final "collection". To do so we rely on the convention that an admin-chosen keyword must be removed from the metadata """ protected_datafields = ["100", "245", "246", "520", "590", "700"] keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name) collections_to_refresh = {} categories = get_journal_categories(journal_name, issue) for category in categories: articles = get_journal_articles(journal_name, issue, category) for order, recids in articles.iteritems(): for recid in recids: record_xml = format_record(recid, of="xm") if not record_xml: continue new_record_xml_path = os.path.join(CFG_TMPDIR, "webjournal_publish_" + str(recid) + ".xml") if os.path.exists(new_record_xml_path): # Do not modify twice continue record_struc = create_record(record_xml) record = record_struc[0] new_record = update_draft_record_metadata(record, protected_datafields, keyword_to_remove) new_record_xml = print_rec(new_record) if new_record_xml.find(keyword_to_remove) >= 0: new_record_xml = new_record_xml.replace(keyword_to_remove, "") # Write to file new_record_xml_file = file(new_record_xml_path, "w") new_record_xml_file.write(new_record_xml) new_record_xml_file.close() # Submit task_low_level_submission("bibupload", "WebJournal", "-c", new_record_xml_path) task_low_level_submission("bibindex", "WebJournal", "-i", str(recid)) for collection in get_all_collections_of_a_record(recid): collections_to_refresh[collection] = "" # Refresh collections collections_to_refresh.update([(c, "") for c in get_journal_collection_to_refresh_on_release(journal_name)]) for collection in collections_to_refresh.keys(): task_low_level_submission("webcoll", "WebJournal", "-f", "-p", "2", "-c", collection)
def create_xml(osti_id, inspire_id): """ The function checks if the OSTI ID should be added to INSPIRE. If so, it builds up that information. """ osti_id = str(osti_id) recid = str(inspire_id) recid = recid.replace('oai:inspirehep.net:', '') search = "001:" + recid result = perform_request_search(p=search, cc='Fermilab') if len(result) != 1: print 'No such INSPIRE record', recid return None create_osti_id_pdf(recid, osti_id) search = "001:" + recid + " 035__a:" + osti_id result = perform_request_search(p=search, cc='Fermilab') if len(result) == 1: return None search = "035__9:osti 035__a:" + str(osti_id) result = perform_request_search(p=search, cc='Fermilab') if len(result) == 1: for item in BibFormatObject(int(recid)).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: print 'OSTI ID', osti_id, 'already on', result[0] return None search = "001:" + recid + " -035__9:OSTI" if TEST: print search result = perform_request_search(p=search, cc='Fermilab') if not len(result) == 1: print search, result print 'Problem with', recid, osti_id return False if TEST: print result record = {} record_add_field(record, '001', controlfield_value=str(recid)) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) #create_osti_id_pdf(recid, osti_id) return print_rec(record)
def create_xml(osti_id, inspire_id): """ The function checks if the OSTI ID should be added to INSPIRE. If so, it builds up that information. """ osti_id = str(osti_id) recid = str(inspire_id) search = "001:" + recid result = perform_request_search(p=search, cc='HEP') if len(result) != 1: print 'No such INSPIRE record', recid return None create_osti_id_pdf(recid, osti_id) search = "001:" + recid + " 035__a:" + osti_id result = perform_request_search(p=search, cc='HEP') if len(result) == 1: return None search = "035__9:osti 035__a:" + str(osti_id) result = perform_request_search(p=search, cc='HEP') if len(result) == 1: for item in BibFormatObject(int(recid)).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: print 'OSTI ID', osti_id, 'already on', result[0] return None search = "001:" + recid + " -035__9:OSTI" if TEST: print search result = perform_request_search(p=search, cc='HEP') if not len(result) == 1: print search, result print 'Problem with', recid, osti_id return False if TEST: print result record = {} record_add_field(record, '001', controlfield_value=str(recid)) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) #create_osti_id_pdf(recid, osti_id) return print_rec(record)
def create_xml(recid=None, osti_id=None, doi=None): osti_exists = False doi_exists = False osti_mismatch = False mismatches = [] osti_subfields = [('9', 'OSTI'), ('a', osti_id)] record = get_record(recid) record_link = '<a href="http://inspirehep.net/record/%s">%s</a>' % ( str(recid), str(recid)) append_record = {} additions = False errors = None for item in BibFormatObject(recid).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: osti_exists = True elif item['9'] == 'OSTI' and item['a'] != osti_id: osti_mismatch = True mismatches.append(item['a']) for item in BibFormatObject(recid).fields('0247_'): if item.has_key('2') and item.has_key('a'): if item['2'] == 'DOI' and item['a'] == doi: doi_exists = True if osti_exists is False and osti_mismatch is True: print str(recid), "already has a different OSTI ID" errors = "doi %s in record %s should match OSTI ID %s, but the record already contains OSTI ID(s) %s<br />" % ( doi, record_link, osti_id, ','.join(mismatches)) return errors if doi_exists is False and osti_exists is True: print str(recid), "contains an OSTI ID but no doi" no_doi = "%s contains OSTI ID %s but not doi %s<br />" % (record_link, osti_id, doi) return no_doi if osti_exists is False and osti_mismatch is False: record_add_field(append_record, '001', controlfield_value=str(recid)) record_add_field(append_record, '035', '', '', subfields=osti_subfields) print "%s: added 035__a:%s" % (str(recid), osti_id) return print_rec(append_record)
def create_xml(author,email,af,experiment,inspire_id): common_fields = {} common_tags = {} author2 = re.sub(r'(.*)\, (.*)',r'\2 \1', author) common_tags['980__'] = [('a', 'HEPNAMES')] common_tags['100__'] = [('a', author), ('q', author2), ('g', 'ACTIVE')] aff = aff_from_email(email) if aff: common_tags['371__'] = [('m', email), ('a', aff), ('z', 'current')] else: common_tags['371__'] = [('m', email), ('z', 'current')] common_tags['693__'] = [('e', experiment), ('z', 'current')] common_tags['035__'] = [('9', 'INSPIRE'), ('a', inspire_id)] for key in common_tags: tag = key record_add_field(common_fields, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) #return common_fields return print_rec(common_fields)
def add_record_cnum(recid, uid): """ Check if the record has already a cnum. If not generate a new one and return the result @param recid: recid of the record under check. Used to retrieve cache file @type recid: int @param uid: id of the user. Used to retrieve cache file @type uid: int @return: None if cnum already present, new cnum otherwise @rtype: None or string """ # Import placed here to avoid circular dependency from invenio.sequtils_cnum import CnumSeq, ConferenceNoStartDateError record_revision, record, pending_changes, deactivated_hp_changes, \ undo_list, redo_list = get_cache_file_contents(recid, uid)[1:] record_strip_empty_volatile_subfields(record) # Check if record already has a cnum tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g") if tag_111__g_content: return else: cnum_seq = CnumSeq() try: new_cnum = cnum_seq.next_value( xml_record=wash_for_xml(print_rec(record))) except ConferenceNoStartDateError: return None field_add_subfield(record['111'][0], 'g', new_cnum) update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, \ undo_list, redo_list) return new_cnum
def create_xml(input_dict): """ The function create_xml takes an article dictionary from ADS and checks to see if it has information that should be added to INSPIRE. If so, it builds up that information. """ # print input_dict record = {} title = [('a', input_dict['title']), ('9', 'OSTI')] record_add_field(record, '245', '', '', subfields=title) doi = [('a', input_dict['doi']), ('2', 'DOI')] record_add_field(record, '024', '7', '', subfields=doi) date = [('c', input_dict['date'])] record_add_field(record, '269', '', '', subfields=date) note = [('h', input_dict['note']), ('9', 'authors')] record_add_field(record, '520', '', '', subfields=note) collection = [('a',input_dict['collection'])] record_add_field(record, '980', '', '', subfields=collection) type = [('t', input_dict['type'])] record_add_field(record, '336', '', '', subfields=type) for i in input_dict['subject']: # print i subject = [('a', i), ('9', 'OSTI')] record_add_field(record, '653', '1', '', subfields=subject) firstauthor = [('a', input_dict['firstauthor'][0]), \ ('u', input_dict['firstauthor'][1]), \ ('v', input_dict['firstauthor'][2])] record_add_field(record, '100', '', '', subfields=firstauthor) for i in input_dict['otherauthors']: # print i otherauthors = [('a', i[0]), ('u', i[1]), ('v', i[2])] record_add_field(record, '700', '', '', subfields=otherauthors) # for key in record: # print key, record[key] # print record return print_rec(record)
def add_record_cnum(recid, uid): """ Check if the record has already a cnum. If not generate a new one and return the result @param recid: recid of the record under check. Used to retrieve cache file @type recid: int @param uid: id of the user. Used to retrieve cache file @type uid: int @return: None if cnum already present, new cnum otherwise @rtype: None or string """ # Import placed here to avoid circular dependency from invenio.sequtils_cnum import CnumSeq, ConferenceNoStartDateError record_revision, record, pending_changes, deactivated_hp_changes, \ undo_list, redo_list = get_cache_contents(recid, uid)[1:] record_strip_empty_volatile_subfields(record) # Check if record already has a cnum tag_111__g_content = record_get_field_value(record, "111", " ", " ", "g") if tag_111__g_content: return else: cnum_seq = CnumSeq() try: new_cnum = cnum_seq.next_value(xml_record=wash_for_xml(print_rec(record))) except ConferenceNoStartDateError: return None field_add_subfield(record['111'][0], 'g', new_cnum) update_cache_contents(recid, uid, record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list) return new_cnum
def create_xml773(recid): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, '773', '', '') correct_subfields = [] for field_instance in field_instances: correct_subfields = [] # print field_instance[0] for code, value in field_instance[0]: if code == 'p' and value == old_journal: correct_subfields.append(('p', repl_journal)) if VERBOSE: print "%s: Replacing 773__p %s with %s" % (recid, value, repl_journal) else: correct_subfields.append((code, value)) record_add_field(correct_record, '773', '', '', subfields=correct_subfields) return print_rec(correct_record)
def ccreate_xml(recid, rawstring): found = False record = {} record_add_field(record, '001', controlfield_value=str(recid)) rawstring = rawstring.lower().replace('proc. of the', '').replace('proc. of', '').replace('.', ' ').replace('(', '').replace(')', '').replace(' -', '') for k,v in term_dict.items(): if k in rawstring: rawstring = rawstring.replace(k,v) matchobj = re.search('(.*?\d{4})', rawstring) if matchobj: search = perform_request_search(p=matchobj.group(), cc='Conferences') if len(search) == 1: for s in search: cnums = get_fieldvalues(s, '111__g') cnum = cnums[0] existing_cnum = get_fieldvalues(recid, '773__w') if cnum not in existing_cnum: print recid, cnum found = True if found: record_add_field(record, '773', '', '', subfields=[('w', cnum)]) return print_rec(record)
def create_xml(recid, experiment): record = get_record(recid) correct_record = {} common_tags = {} experiment_tag = {} experiment_tag['693__'] = [('e', experiment)] tags = ['693__','710__'] #for tag in tags: # field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) # for field_instance in field_instances: # correct_subfields = [] # for code, value in field_instance[0]: # correct_subfields.append((code, value)) # record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ # subfields=correct_subfields) record_add_field(correct_record, '693', '_', '_', \ subfields=experiment_tag['693__']) record_add_field(correct_record, '001', controlfield_value=str(recid)) for key in common_tags: tag = key record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=common_tags[key]) return print_rec(correct_record)
def create_xml(recid, old_aff=None, new_aff=None, skip_aff=None): record = get_record(recid) correct_record = {} tags = ('100__', '700__') record_add_field(correct_record, '001', controlfield_value=recid) for tag in tags: field_instances = record_get_field_instances(record, \ tag[0:3], tag[3], tag[4]) for field_instance in field_instances: correct_subfields = [] skip_aff_exists = False for aff in skip_aff: if any(val for code, val in field_instance[0] if aff in val): skip_aff_exists = True if VERBOSE: print "%s exists, deleting %s" % (aff, old_aff) if skip_aff_exists: for code, value in field_instance[0]: if code == 'u': if value != old_aff: correct_subfields.append((code, value)) else: correct_subfields.append((code, value)) else: for code, value in field_instance[0]: if code == 'u': if value == old_aff: correct_subfields.append((code, new_aff)) if VERBOSE: print "Changing %s to %s" % (old_aff, new_aff) else: correct_subfields.append((code, value)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], \ subfields=correct_subfields) return print_rec(correct_record)