def bst_openaire_altmetric(): """ """ recids = search_pattern(p="0->Z", f="0247_a") a = Altmetric() for recid in recids: try: # Check if we already have an Altmetric id sysno_inst = get_fieldvalues(recid, "035__9") if ['Altmetric'] in sysno_inst: continue doi_val = get_fieldvalues(recid, "0247_a")[0] json_res = a.doi(doi_val) rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) if json_res: record_add_field(rec, '035', subfields=[('a', str(json_res['altmetric_id'])), ('9', 'Altmetric')]) bibupload(rec, opt_mode='correct') except AltmetricHTTPException, e: register_exception(prefix='Altmetric error (status code %s): %s' % (e.status_code, str(e)), alert_admin=False)
def _record_in_files_p(recid, filenames): """Search XML files for given record.""" # Get id tags of record in question rec_oaiid = rec_sysno = -1 rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG) if rec_oaiid_tag: rec_oaiid = rec_oaiid_tag[0] rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG) if rec_sysno_tag: rec_sysno = rec_sysno_tag[0] # For each record in each file, compare ids and abort if match is found for filename in filenames: try: file_ = open(filename) records = create_records(file_.read(), 0, 0) for i in range(0, len(records)): record, all_good = records[i][:2] if record and all_good: if _record_has_id_p(record, recid, rec_oaiid, rec_sysno): return True file_.close() except IOError: continue return False
def get_recid_and_reportnumber(recid=None, reportnumber=None): """ Given at least a recid or a reportnumber, this function will look into the system for the matching record and will return a normalized recid and the primary reportnumber. @raises ValueError: in case of no record matched. """ if recid: ## Recid specified receives priority. recid = int(recid) values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER) if values: ## Let's take whatever reportnumber is stored in the matching record reportnumber = values[0] return recid, reportnumber else: raise ValueError("The record %s does not have a primary report number" % recid) elif reportnumber: ## Ok reportnumber specified, let's better try 1st with primary and then ## with other reportnumber recids = search_pattern(p='%s:"%s"' % (CFG_PRIMARY_REPORTNUMBER, reportnumber)) if not recids: ## Not found as primary recids = search_pattern(p='reportnumber:"%s"' % reportnumber) if len(recids) > 1: raise ValueError('More than one record matches the reportnumber "%s": %s' % (reportnumber, ', '.join(recids))) elif len(recids) == 1: recid = list(recids)[0] reportnumbers = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER) if not reportnumbers: raise ValueError("The matched record %s does not have a primary report number" % recid) return recid, reportnumbers[0] else: raise ValueError("No records are matched by the provided reportnumber: %s" % reportnumber) raise ValueError("At least the recid or the reportnumber must be specified")
def build_issns_from_local_site(): """ Retrieves the ISSNs from the local database. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. """ rec_id_list = perform_request_search(cc='Periodicals', of='id') built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for rec_id in rec_id_list: journal_name_list = get_fieldvalues(rec_id, '210__%') issn_list = get_fieldvalues(rec_id, '022__a') if issn_list: issn = issn_list[0] # There should be only one ISSN for journal_name in journal_name_list: # Depending on how journal names are entered into the database, # you might want to do some processing before saving: journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() built_issns[journal_name] = issn prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns)
def get_date(recid, product_type): """Get date in format mm/dd/yyyy, yyyy or yyyy Month.""" try: date = get_fieldvalues(recid, '260__c')[0] except IndexError: try: date = get_fieldvalues(recid, '269__c')[0] except IndexError: try: date = get_fieldvalues(recid, '502__d')[0] except IndexError: date = '1900' try: date_object = datetime.datetime.strptime(date, '%Y-%m-%d') date = date_object.strftime('%m/%d/%Y') except ValueError: try: date_object = datetime.datetime.strptime(date, '%Y-%m') date = date_object.strftime('%Y %B') if product_type in ['TR', 'TD', 'JA']: date = date_object.strftime('%m/01/%Y') except ValueError: if product_type in ['TR', 'TD', 'JA']: date = '01/01/' + str(date) return date
def render_dataverse_dataset_html(recid, display_link = True): """ Rendering a single Dataverse dataset, both for the tab and the record @param display_link Indicates if a link to the data record should be displayed @type display_link boolean """ from invenio.search_engine import get_fieldvalues # rendering the HTML code c = [] #collecting parts of the output c.append("<div style=\"background-color: #ececec; padding:10px;\">") comments = get_fieldvalues(recid, '520__h')[0] publisher = get_fieldvalues(recid, '520__9') c.append("<br />") c.append("<b>Description: </b> " + comments + "<br />") c.append("<br />") link_txt = "Go to the record" if display_link: c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt)) c.append("<br /><br />") if publisher[0] == 'Dataverse' and display_link == False: c.append("<div class=\"hepdataTablePlaceholder\">") c.append("<table cellpadding=\"0\" cellspacing=\"0\" class=\"hepdataTable\">") c.append("<tr><td style=\"text-align: center;\">Preview not available</td>") c.append("</tr>") c.append("</table>") c.append("</div>") c.append("<br /><br />") c.append("</div>") return "\n".join(c)
def main(experiment, collaboration): authors = {} affiliations = [] affiliation_count = 1 search = "693__e:" + experiment x = perform_request_search(p = search, cc = 'HepNames') for r in x: foaf_name = get_fieldvalues(r, '100__q') cal_authorNameNative = get_fieldvalues(r, '400__a') name = get_fieldvalues(r, '100__a')[0] foaf_givenName = re.sub(r'.*\, ', '', name) foaf_familyName = re.sub(r'\,.*', '', name) author_id = find_inspire_id_from_record(r) orcid = get_hepnames_anyid_from_recid(r, 'ORCID') if VERBOSE: print r affiliation = get_hepnames_aff_from_recid(r, 'Current') if not affiliation: print 'No aff - find recid', r d = {} d['foaf_givenName'] = foaf_givenName d['foaf_familyName'] = foaf_familyName d['affiliation'] = affiliation d['author_id'] = author_id authors[name.lower()] = d affiliations.append(affiliation) affiliations = affiliations_process(affiliations) for key in authors: affiliation = authors[key]['affiliation'] affiliation_number = affiliations.index(affiliation) + 1 authors[key]['affiliation_id'] = affiliation_number print xml_frontmatter(experiment, collaboration) print xml_affiliations(affiliations) print xml_authors(authors)
def main(search): """This module returns a Google-like result showing the most highly cited papers from a given result.""" all_refs = [] if not search: search = 'standard model' search = '"dark matter"' search = 'qcd sum rules' print 'Your search is', search result = perform_request_search(p=search, cc='HEP') print 'The result is', len(result) for recid in result: try: search = 'citedby:recid:' + str(recid) refs = perform_request_search(p=search, cc='HEP') all_refs += refs except: print 'problem with', recid all_refs.sort() counted_all_refs = Counter(all_refs) sorted_count = sorted(counted_all_refs.items(), key=operator.itemgetter(1), reverse=True) for recid_count, count in sorted_count[-10:]: url = 'http://inspirehep.net/record/' + str(recid_count) print count, url title = get_fieldvalues(recid_count, '245__a')[0] try: author = get_fieldvalues(recid_count, '710__g')[0] except: try: author = get_fieldvalues(recid_count, '100__a')[0] except: author = 'No Author' print ' ', author, ':', title
def main(search): """This module returns a Google-like result showing the most highly cited papers from a given result.""" all_refs = [] if not search: search = 'standard model' search = '"dark matter"' search = 'qcd sum rules' print 'Your search is', search result = perform_request_search(p=search, cc='HEP') print 'The result is', len(result) for recid in result: try: search = 'citedby:recid:' + str(recid) refs = perform_request_search(p=search, cc='HEP') all_refs += refs except: print 'problem with', recid all_refs.sort() counted_all_refs = Counter(all_refs) sorted_count = sorted(counted_all_refs.items(), key=operator.itemgetter(1)) for recid_count, count in sorted_count[-10:]: url = 'http://inspirehep.net/record/' + str(recid_count) print count, url title = get_fieldvalues(recid_count, '245__a')[0] try: author = get_fieldvalues(recid_count, '710__g')[0] except: try: author = get_fieldvalues(recid_count, '100__a')[0] except: author = 'No Author' print ' ', author, ':', title
def migrate_bibdoc_status(recid, is_public, access_right): from invenio.search_engine import get_fieldvalues from invenio.bibdocfile import BibRecDocs # Generate firerole fft_status = [] if is_public: email = get_fieldvalues(recid, "8560_f")[0] if access_right == "open": # Access to everyone fft_status = ["allow any"] elif access_right == "embargoed": # Access to submitted, Deny everyone else until embargo date, # then allow all date = get_fieldvalues(recid, "942__a")[0] fft_status = ['allow email "%s"' % email, 'deny until "%s"' % date, "allow any"] elif access_right in ("closed", "restricted"): # Access to submitter, deny everyone else fft_status = ['allow email "%s"' % email, "deny all"] else: # Access to submitter, deny everyone else fft_status = None if fft_status: fft_status = "firerole: %s" % "\n".join(fft_status) brd = BibRecDocs(recid) for d in brd.list_bibdocs(): d.set_status(fft_status)
def ccreate_xml(recid, rawstring): found = False record = {} record_add_field(record, '001', controlfield_value=str(recid)) rawstring = rawstring.lower().replace('proc. of the', '').replace( 'proc. of', '').replace('.', ' ').replace('(', '').replace(')', '').replace(' -', '') for k, v in term_dict.items(): if k in rawstring: rawstring = rawstring.replace(k, v) matchobj = re.search('(.*?\d{4})', rawstring) if matchobj: search = perform_request_search(p=matchobj.group(), cc='Conferences') if len(search) == 1: for s in search: cnums = get_fieldvalues(s, '111__g') cnum = cnums[0] existing_cnum = get_fieldvalues(recid, '773__w') if cnum not in existing_cnum: print recid, cnum found = True if found: record_add_field(record, '773', '', '', subfields=[('w', cnum)]) return print_rec(record)
def solr_add_range(lower_recid, upper_recid): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): try: abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8') except: abstract = "" try: first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]) additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')) author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8') except: fulltext = "" try: keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8') except: keyword = "" try: title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8') except: title = "" solr_add(recid, abstract, author, fulltext, keyword, title) SOLR_CONNECTION.commit() task_sleep_now_if_required(can_stop_too=True)
def main(experiment, collaboration): authors = {} affiliations = [] affiliation_count = 1 search = "693__e:" + experiment x = perform_request_search(p = search, cc = 'HepNames') for r in x: foaf_name = get_fieldvalues(r, '100__q') cal_authorNameNative = get_fieldvalues(r, '400__a') name = get_fieldvalues(r, '100__a')[0] foaf_givenName = re.sub(r'.*\, ', '', name) foaf_familyName = re.sub(r'\,.*', '', name) author_id = find_inspire_id_from_record(r) if VERBOSE: print r affiliation = get_hepnames_affiliation_from_recid(r, 'Current') if not affiliation: print 'find recid', r d = {} d['foaf_givenName'] = foaf_givenName d['foaf_familyName'] = foaf_familyName d['affiliation'] = affiliation d['author_id'] = author_id authors[name.lower()] = d affiliations.append(affiliation) affiliations = affiliations_process(affiliations) for key in authors: affiliation = authors[key]['affiliation'] affiliation_number = affiliations.index(affiliation) + 1 authors[key]['affiliation_id'] = affiliation_number print xml_frontmatter(experiment, collaboration) print xml_affiliations(affiliations) print xml_authors(authors)
def exp4coll(collaboration): from invenio.search_engine import perform_request_search from invenio.search_engine import get_fieldvalues experiment = None experiments = [] correct_coll = None exp_candidates = perform_request_search(p='710__g:"%s"' % collaboration, cc='Experiments') if len(exp_candidates) == 1: experiment = get_fieldvalues(exp_candidates[0], '119__a')[0] return experiment, experiments, correct_coll if len(exp_candidates) > 1: experiments = [get_fieldvalues(exp_recid, '119__a')[0] for exp_recid in exp_candidates] return experiment, experiments, correct_coll # search in EXP-name instead exp_candidates = perform_request_search(p='119__a:"%s"' % collaboration, cc='Experiments') if len(exp_candidates) == 1: experiment = collaboration m710g = get_fieldvalues(exp_candidates[0], '710__g') if m710g: correct_coll = m710g return experiment, experiments, correct_coll # fuzzy search exp_candidates = perform_request_search(p="710__g:'%s'" % collaboration, cc='Experiments') exp_candidates += perform_request_search(p="419__a:'%s'" % collaboration, cc='Experiments') short_coll = re.sub(r'^(...[^ /_-]*)[ /_-].*',r'\1', collaboration) exp_candidates += perform_request_search(p="710__g:'%s'" % short_coll, cc='Experiments') exp_candidates = set(exp_candidates) experiments = [get_fieldvalues(exp_recid, '119__a')[0] for exp_recid in exp_candidates] return experiment, experiments, correct_coll
def book_information_from_MARC(recid): """ Retrieve book's information from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return tuple with title, year, author, isbn and editor. """ book_title = ' '.join(get_fieldvalues(recid, "245__a") + \ get_fieldvalues(recid, "245__b") + \ get_fieldvalues(recid, "245__n") + \ get_fieldvalues(recid, "245__p")) book_year = ' '.join(get_fieldvalues(recid, "260__c")) book_author = ' '.join(get_fieldvalues(recid, "100__a") + \ get_fieldvalues(recid, "100__u")) book_isbn = ' '.join(get_fieldvalues(recid, "020__a")) book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \ get_fieldvalues(recid, "260__b")) return (book_title, book_year, book_author, book_isbn, book_editor)
def get_author_number(recid): """Gets number of authors.""" author_list = get_fieldvalues(recid, "100__a") + \ get_fieldvalues(recid, "700__a") try: return len(author_list) except IndexError: return 0
def get_corporate_author(recid): """Check to see if there is a corporte author and return it.""" try: #return get_fieldvalues(recid, "110__a")[0] author_list = get_fieldvalues(recid, "110__a") \ + get_fieldvalues(recid, "710__a") return '; '.join([unicode(a, "utf-8") for a in author_list]) except IndexError: return None
def main(recids): """ Gets name and email from each HEPNames record. """ if VERBOSE: print recids icount = 1 for recid in recids: if recid in BAD_RECIDS: break recid_str = str(recid) recid_int = int(recid) if re.search(r'INSPIRE-', recid_str): search = '035__a:' + recid_str result = perform_request_search(p=search, cc='HepNames') recid = result[0] recid_str = str(recid) recid_int = int(recid) if get_hepnames_anyid_from_recid(recid_int, 'ORCID'): print recid_str, 'already has an ORCID\n' icount += 1 continue try: contact_email = get_fieldvalues(recid_int, '371__m')[0] except: contact_email = '*****@*****.**' try: contact_name = get_fieldvalues(recid_int, '100__a')[0] if "," in contact_name: contact_name = " ".join(contact_name.split(", ")[::-1]) except: contact_name = 'Sir or Madam' #contact_email = '*****@*****.**' #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" print icount, '/', len(recids) print 'recid = ', recid_str print 'email = ', contact_email print 'name = ', contact_name print ' ' try: send_jobs_mail(recid_str, contact_email, contact_name) time.sleep(1) except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print 'PROBLEM sending mail to:' print recid, contact_email, contact_name, '\n' icount += 1
def doi_to_pbn(): print "<?xml version=\"1.0\" ?>" print "<collection>" searches = ['0247_a:/PhysRev.*/ -773__p:/Phys.Rev./', '0247_a:/PhysRev.*/ -773__c:/[0-9]/', '0247_a:/PhysRev.*/ -773__y:/[0-9]{4}/', '0247_a:/RevModPhys.*/ -773__p:/Rev.Mod.Phys./', '0247_a:/RevModPhys.*/ -773__c:/[0-9]/', '0247_a:/RevModPhys.*/ -773__y:/[0-9]{4}/' ] for search in searches: x = perform_request_search(p=search,cc='HEP') x = x[:200] for r in x: if VERBOSE: print r doi = get_fieldvalues(r,'0247_a')[0] pbn = get_fieldvalues(r,'773__p') [publisher,jvp] = re.split('/',doi) try: [journal,volume,page] = re.split('\.',jvp) except ValueError: print "Error in:", r, journal,volume,page except UnboundLocalError: print "Error in:", r, "with the journal,volume,page" try: volumeNumber = int(volume) except ValueError: print "Error:", volume, " is not a number" if journal == 'PhysRevSTAB': journal = 'Phys.Rev.ST Accel.Beams' elif journal == 'PhysRevSTPER': journal = 'Phys.Rev.ST Phys.Educ.Res.' elif journal == 'RevModPhys': journal = 'Rev.Mod.Phys.' else: matchObj = re.search("[A-EX]$", journal) if matchObj: volume = matchObj.group() + volume journal = re.sub(r'[A-EX]$',r'',journal) journal = journal + "." journal = re.sub(r'([a-z])([A-Z])',r'\1.\2',journal) year = str(yearCalc(journal,volume)) print '<record>' print ' <controlfield tag="001">'+str(r)+'</controlfield>' print ' <datafield tag="773" ind1=" " ind2=" ">' print ' <subfield code="p">' + journal + '</subfield>' print ' <subfield code="v">' + volume + '</subfield>' print ' <subfield code="c">' + page + '</subfield>' print ' <subfield code="y">' + year + '</subfield>' print ' </datafield>' print '</record>' print "</collection>"
def get_fermilab_report(recid): """Get the Fermilab report number.""" report = None report_numbers = get_fieldvalues(recid, "037__a") + \ get_fieldvalues(recid, "037__z") for report_number in report_numbers: if report_number.startswith('FERMILAB'): report = report_number return report
def migrate_980__ab(recid, rec): from invenio.bibrecord import record_add_field from invenio.search_engine import get_fieldvalues collections = get_fieldvalues(recid, "980__a") subcollections = get_fieldvalues(recid, "980__b") upload_type = [] extras = [] curated = True for val in collections: if val in [ 'DARK', 'DELETED', 'DUPLICATE', 'PENDING', 'REJECTED', 'PROVISIONAL', ]: curated = False extras.append(val) if val in collection_mapping: upload_type.append(collection_mapping[val]) elif val in newcolls: upload_type.append(val) for val in subcollections: if val in collection_mapping: upload_type.append(collection_mapping[val]) elif val in newsubcolls: upload_type.append(val) if upload_type: upload_type = [upload_type[0]] is_public = False if curated: upload_type.append(('curated', '')) is_public = True for a, b in upload_type: if b: record_add_field(rec, '980', subfields=[('a', a), ('b', b)]) else: record_add_field(rec, '980', subfields=[ ('a', a), ]) if extras: for e in extras: record_add_field(rec, '980', subfields=[ ('a', e), ]) return (rec, is_public)
def get_eprint(recid): """Get the eprintt number from a record.""" report_fermilab = None eprint = None url = None reports = get_fieldvalues(recid, '037__a') reports = reports + get_fieldvalues(recid, '037__z') if VERBOSE: print reports for report in reports: if re.search("FERMILAB", report): report_fermilab = report if VERBOSE: print report_fermilab if not report_fermilab: return None bfo = BibFormatObject(recid) eprint = bfe_arxiv.get_arxiv(bfo, category="no") if VERBOSE: print eprint if eprint: eprint = eprint[0] print report_fermilab, eprint return None for url_i in get_fieldvalues(recid, '8564_u'): if re.match(r'https?://inspirehep.net.*pdf', url_i): url = url_i for item in BibFormatObject(int(recid)).fields('8564_'): if item.has_key('y') or item.has_key('z') and item.has_key('u'): try: if re.search('fermilab', item['y'].lower()): return None except KeyError: pass if item['u'].endswith('pdf'): url = item['u'] try: if item['y'].lower() == 'fulltext': url = item['u'] if item['y'].lower() == 'poster': url = None if item['y'].lower() == 'slides': url = None except KeyError: pass try: if item['z'].lower() == 'openaccess': url = item['u'] except KeyError: pass if url: print report_fermilab, url
def main(recids): icount = 1 for recid in recids: recid = str(recid) title = get_fieldvalues(recid, '245__a')[0] title = title[:50] try: contact_email = get_fieldvalues(recid, '270__m')[0] except IndexError: contact_email = '*****@*****.**' try: contact_name = get_fieldvalues(recid, '270__p')[0] if "," in contact_name: contact_name = " ".join(contact_name.split(", ")[::-1]) #contact_name = contact_name except IndexError: contact_name = 'Sir or Madam' if contact_email == '*****@*****.**': contact_email = '*****@*****.**' #contact_email = '*****@*****.**' elif contact_email == 'recruitment.service@cern': contact_email = 'Caroline.Dumont@cern' #contact_email = '*****@*****.**' #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" #contact_email = "*****@*****.**" try: deadline = get_fieldvalues(recid, '046__i')[0] except IndexError: print 'PROBLEM: no deadline' print recid, contact_email, contact_name, title print '' try: print icount, '/', len(recids) print 'recid = ', recid print 'title = ', title print 'email = ', contact_email print 'name = ', contact_name print 'dline = ', deadline print ' ' send_jobs_mail(recid, contact_email, contact_name, title, deadline) except: print 'PROBLEM' print recid, contact_email, contact_name, title, deadline icount += 1
def MBI_Mail_Blog_Modified_to_User(parameters, curdir, form, user_info=None): """ This function sends an email to the user who modified any metadata of a blog record saying that the blog was successfully modified Parameters: * emailFile: Name of the file containing the email of the user """ global rn, sysno FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) sequence_id = bibtask_allocate_sequenceid(curdir) blog_title = "".join(["%s" % title.strip() for title in \ get_fieldvalues(int(sysno), "245__a")]) blog_url = "".join(["%s" % url.strip() for url in \ get_fieldvalues(int(sysno), "520__u")]) # The submitters email address is read from the file specified by 'emailFile' try: fp = open("%s/%s" % (curdir,parameters['emailFile']),"r") m_recipient = fp.read().replace ("\n"," ") fp.close() except: m_recipient = "" # create email body email_txt = "\nModifications done on the metadata of the blog record with URL [%s] and title '%s' have been correctly applied.\n\n" % (blog_url, blog_title) email_txt += "It will be soon accessible here: <%s/%s/%s>\n" % (CFG_SITE_URL, CFG_SITE_RECORD, sysno) # email_txt += get_nice_bibsched_related_message(curdir) email_txt = email_txt + "\nThank you for using %s Submission Interface.\n" % CFG_SITE_NAME email_subject = "Blog record modification done: [%(id)s]" if blog_title: email_subject = email_subject % {'id' : blog_title} else: email_subject = email_subject % {'id' : blog_url} ## send the mail, if there are any recipients or copy to admin if m_recipient or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN: scheduled_send_email(FROMADDR, m_recipient.strip(), email_subject, email_txt, copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, other_bibtasklet_arguments=['-I', str(sequence_id)]) return ""
def get_recid_and_reportnumber(recid=None, reportnumber=None, keep_original_reportnumber=True): """ Given at least a recid or a reportnumber, this function will look into the system for the matching record and will return a normalized recid and the primary reportnumber. @raises ValueError: in case of no record matched. """ if recid: ## Recid specified receives priority. recid = int(recid) values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER) if values: ## Let's take whatever reportnumber is stored in the matching record reportnumber = values[0] return recid, reportnumber else: raise ValueError( "The record %s does not have a primary report number" % recid) elif reportnumber: ## Ok reportnumber specified, let's better try 1st with primary and then ## with other reportnumber recids = search_pattern(p='%s:"%s"' % (CFG_PRIMARY_REPORTNUMBER, reportnumber)) if not recids: ## Not found as primary recids = search_pattern(p='reportnumber:"%s"' % reportnumber) if len(recids) > 1: raise ValueError( 'More than one record matches the reportnumber "%s": %s' % (reportnumber, ', '.join([str(i) for i in recids]))) elif len(recids) == 1: recid = list(recids)[0] if keep_original_reportnumber: return recid, reportnumber else: reportnumbers = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER) if not reportnumbers: raise ValueError( "The matched record %s does not have a primary report number" % recid) return recid, reportnumbers[0] else: raise ValueError( "No records are matched by the provided reportnumber: %s" % reportnumber) raise ValueError( "At least the recid or the reportnumber must be specified")
def get_ref_metadata_inspire(cls, ref, dois): """Get the metadata for a particular reference from INSPIRE.""" if ref not in dois: return None ref = ref.replace('doi:', '') recid = perform_request_search(p='0247_a:' + ref, cc='HEP') + \ perform_request_search(p='0247_a:' + ref, cc='Fermilab') try: recid = recid[0] title = get_fieldvalues(recid, '245__a')[0] author = get_fieldvalues(recid, '100__a')[0] return """This DOI is in INSPIRE {0} : {1}""".format(author, title) except IndexError: return 'DOI should be in HEP but is not: ' + ref
def get_eprint(recid): """Get the eprintt number from a record.""" report_fermilab = None eprint = None url = None reports = get_fieldvalues(recid, '037__a') reports = reports + get_fieldvalues(recid, '037__z') if VERBOSE: print reports for report in reports: if re.search("FERMILAB", report): report_fermilab = report if VERBOSE: print report_fermilab if not report_fermilab: return None bfo = BibFormatObject(recid) eprint = bfe_arxiv.get_arxiv(bfo, category = "no") if VERBOSE: print eprint if eprint: eprint = eprint[0] print report_fermilab, eprint return None for url_i in get_fieldvalues(recid, '8564_u'): if re.match(r'https?://inspirehep.net.*pdf', url_i): url = url_i for item in BibFormatObject(int(recid)).fields('8564_'): if item.has_key('y') or item.has_key('z') and item.has_key('u'): try: if re.search('fermilab', item['y'].lower()): return None except KeyError: pass if item['u'].endswith('pdf'): url = item['u'] try: if item['y'].lower() == 'fulltext': url = item['u'] except KeyError: pass try: if item['z'].lower() == 'openaccess': url = item['u'] except KeyError: pass if url: print report_fermilab, url
def main(): counter = 0 filename = 'ADS_eprints_missing_in_INSPIRE.csv' mismatch_filename = ''ADS_eprints_missing_in_INSPIRE_mismatch.csv' output = open(filename, 'w') mismatch_output = open(mismatch_filename, 'w') records = collections.defaultdict(dict) search = '0247_2:doi -037__9:arxiv' results = perform_request_search(p=search, cc='HEP') for r in results: doi = get_fieldvalues(r, '0247_a') if doi: records[r]['doi'] = doi eprints = [] eprint_search = perform_request_search(p='037__9:arxiv', cc='HEP') for e in eprint_search: eprint = get_eprint_id(e) if eprint: eprint = eprint.replace('arxiv:', '') eprints.append(eprint) tree = ET.parse(DOCUMENT) root = tree.getroot() for child in root: if counter < 10: if 'doi' and 'preprint_id' in child.attrib: found_eprint = check_doi(child.attrib, records, eprints) if found_eprint: if found_eprint[0] is True: counter+=1 output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2])) else: mismatch_output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2])) output.close() print counter
def openaire_register_doi(recid): """ Register a DOI for new publication If it fails, it will retry every 10 minutes for 1 hour. """ doi_val = get_fieldvalues(recid, "0247_a")[0] logger.debug("Found DOI %s in record %s" % (doi_val, recid)) pid = PersistentIdentifier.get("doi", doi_val) if not pid: logger.debug("DOI not locally managed.") return else: logger.debug("DOI locally managed.") if not pid.has_object("rec", recid): raise Exception("DOI %s is not assigned to record %s." % (doi_val, recid)) if pid.is_new() or pid.is_reserved(): logger.info("Registering DOI %s for record %s" % (doi_val, recid)) url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid) doc = format_record(recid, 'dcite') if not pid.register(url=url, doc=doc): m = "Failed to register DOI %s" % doi_val logger.error(m + "\n%s\n%s" % (url, doc)) if not openaire_register_doi.request.is_eager: raise openaire_register_doi.retry(exc=Exception(m)) else: logger.info("Successfully registered DOI %s." % doi_val)
def main(): from_base = 'http://openaire.cern.ch' to_base = config.CFG_SITE_URL # All records recids = search_pattern(p="0->Z", f="8564_u") print "<collection>" for recid in recids: # Get record information touched = False file_links = get_fieldvalues(recid, "8564_u") new_file_links = map(replace_link_func(from_base, to_base), file_links) # Print correcting to record rec = {} record_add_field(rec, "001", controlfield_value=str(recid)) for old_link, new_link in zip(file_links, new_file_links): if old_link != new_link: touched = True record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)]) if touched: print record_xml_output(rec) print "</collection>"
def check_record_status(recid): """Checks to see if a PDF has already been sent or if we have an accepted manuscript. """ if check_already_sent(recid): return True try: JOURNALS.append(get_fieldvalues(recid, '773__p')[0]) except IndexError: print 'No journal on:\nhttp://inspirehep.net/record/' + \ str(recid) if not PDF_CHECK: return False print "Checking accepted status", recid accepted_status = get_url(recid) if True in accepted_status: return True elif None in accepted_status: if VERBOSE: print 'No url on:\nhttp://inspirehep.net/record/' + str(recid) return False else: if VERBOSE: print recid, accepted_status return False
def openaire_delete_doi(recid): """ Delete DOI in DataCite If it fails, it will retry every 10 minutes for 1 hour. """ doi_val = get_fieldvalues(recid, "0247_a")[0] logger.debug("Found DOI %s in record %s" % (doi_val, recid)) pid = PersistentIdentifier.get("doi", doi_val) if not pid: logger.debug("DOI not locally managed.") return else: logger.debug("DOI locally managed.") if not pid.has_object("rec", recid): raise Exception("DOI %s is not assigned to record %s." % (doi_val, recid)) if pid.is_registered(): logger.info("Inactivating DOI %s for record %s" % (doi_val, recid)) if not pid.delete(): m = "Failed to inactive DOI %s" % doi_val logger.error(m) if not openaire_delete_doi.request.is_eager: raise openaire_delete_doi.retry(exc=Exception(m)) else: logger.info("Successfully inactivated DOI %s." % doi_val)
def generate_list_to_send(search): ''' Generate a list to send to MSNET. ''' filename = 'tmp_' + __file__ filename = re.sub('.py', '_send.txt', filename) output = open(filename,'w') recids_nomatch = find_recids_nomatch() print search result_m = perform_request_search(p=search, cc='HEP') print search, len(result_m) search = "035__9:msnet" result_i = perform_request_search(p=search, cc='HEP') search = "0247_2:doi" result_d = perform_request_search(p=search, cc='HEP') result = intbitset(result_m) & intbitset(result_d) - intbitset(result_i) result = result - intbitset(recids_nomatch) for recid in result: try: doi = get_fieldvalues(recid, '0247_a')[0] except IndexError: print 'Problem with:', recid, doi break output.write(str(recid) + ',' + doi + '\n') output.close() print filename
def get_collaborations(recid): """Get the collaboration information""" try: collaborations = get_fieldvalues(recid, "710__g") return '; '.join([unicode(a, "utf-8") for a in collaborations]) except StandardError: return None
def get_affiliations(recid, long_flag): """Get affiliations using OSTI institution names.""" affiliations = get_fieldvalues(recid, "100__u") \ + get_fieldvalues(recid, "700__u") affiliations.append("Fermilab") doe_affs = [] doe_affs_long = [] for aff in set(affiliations): #if aff in INSPIRE_AFF_DICT and not INSPIRE_AFF_DICT[aff] in doe_affs: if aff in INSPIRE_AFF_DICT: doe_affs.append(INSPIRE_AFF_DICT[aff]) doe_affs_long.append(DOE_AFF_DICT[INSPIRE_AFF_DICT[aff]]) if long_flag: return '; '.join([a for a in doe_affs_long]) else: return '; '.join([a for a in doe_affs])
def get_author_number(recid): """Gets number of authors.""" author_list = get_fieldvalues(recid, "700__a") try: return len(author_list) except IndexError: return 0
def create_xml(recid): record = get_record(recid) correct_record = {} record_add_field(correct_record, '001', controlfield_value=str(recid)) field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4]) correct_subfields = [] for field_instance in field_instances: correct_subfields = [] for code, value in field_instance[0]: if volume_letter: if code == 'p': correct_subfields.append(('p', repl_journal)) elif code == 'v': volume = get_fieldvalues(recid, '773__v') for v in volume: if v[0].isalpha(): correct_subfields.append(('v', v)) else: new_volume = volume_letter + v correct_subfields.append(('v', new_volume)) else: correct_subfields.append((code, value)) else: if code == 'p': correct_subfields.append(('p', repl_journal)) else: correct_subfields.append((code, value)) record_add_field(correct_record, tag[0:3], tag[3], tag[4], subfields=correct_subfields) return print_rec(correct_record)
def unlinked(req): """ Return an id-ordered list of citation log entries of at most 10000 rows. """ from invenio.dbquery import run_sql from invenio.search_engine import get_fieldvalues, get_collection_reclist useful_personids1 = intbitset(run_sql("SELECT distinct personid FROM aidPERSONIDDATA WHERE tag LIKE 'extid:%'")) useful_personids2 = intbitset(run_sql("SELECT distinct personid from aidPERSONIDPAPERS where flag=2")) linked_personids = intbitset(run_sql("SELECT personid FROM aidPERSONIDDATA WHERE tag='extid:INSPIREID'")) names = dict(run_sql("SELECT personid, data FROM aidPERSONIDDATA WHERE tag='canonical_name'")) matched_names = [name.lower().strip() for name in get_fieldvalues(get_collection_reclist('HepNames'), '035__a')] personid_to_match = (useful_personids1 | useful_personids2) - linked_personids body = ['<ol>'] for personid in personid_to_match: name = names.get(personid, str(personid)) if name.lower().strip() in matched_names: continue body.append('<li><a href="%(siteurl)s/author/profile/%(bai)s" target="_blank">%(bai)s</a></li>' % { 'siteurl': escape(CFG_SITE_SECURE_URL, True), 'bai': escape(name, True)}) body.append('</ol>') body = '\n'.join(body) return page(req=req, body=body, title="Unlinked useful BAIs")
def render_other_dataset_html(recid, display_link = True): """ Try to render the basic content of an unknown dataset, both for the tab and the record @param display_link Indicates if a link to the data record should be displayed @type display_link boolean """ from invenio.search_engine import get_fieldvalues c = [] #collecting parts of the output c.append("<div style=\"background-color: #ececec; padding:10px;\">") comments = get_fieldvalues(recid, '520__h') if comments: comments = comments[0] c.append("<br />") c.append("<b>Description: </b> " + comments + "<br />") c.append("<br />") link_txt = "Go to the record" if display_link: c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt)) c.append("<br /><br />") c.append("</div>") return "\n".join(c)
def book_title_from_MARC(recid): """ Retrieve book's title from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return book's title """ book_title = ' '.join(get_fieldvalues(recid, "245__a") + \ get_fieldvalues(recid, "245__b") + \ get_fieldvalues(recid, "245__n") + \ get_fieldvalues(recid, "245__p")) return book_title
def check_record(record): """ replace old ids in 999C50 with superseeding ids """ for pos, val in record.iterfield('999C50'): if val: try: val = int(val) except ValueError: record.warn("invalid non-digit id in %r" % (pos, )) continue if val in superseeded_recids_cache(): newrecs = set(get_fieldvalues(val, '970__d')) if len(newrecs) == 1: newid = newrecs.pop() try: int(newid) except ValueError: record.warn("non digit value in 970__d for %r" % (pos, )) continue record.amend_field(pos, newid, "replaced %s with %s" % (val, newid)) elif len(newrecs) > 1: record.warn("more than one 970__d for %r" % (pos, ))
def task_run_core(): """ run daemon """ #write_message("Getting expired loans ...", verbose=9) expired_loans = get_expired_loan() for (borrower_id, loan_id, recid) in expired_loans: (number_of_letters, date_letters) = get_overdue_letters_info(loan_id) if number_of_letters == 0: content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id) elif number_of_letters == 1 and send_second_recall(date_letters): content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id) elif number_of_letters == 2 and send_third_recall(date_letters): content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) else: content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) title = ''.join(get_fieldvalues(recid, "245__a")) subject = "LOAN RECALL: " + title update_expired_loan(loan_id) #write_message("Updating information about expired loans") send_overdue_letter(borrower_id, subject, content) #write_message("Sending overdue letter") #write_message("Done!!") return 1
def task_run_core(): """ run daemon """ #write_message("Getting expired loans ...", verbose=9) expired_loans = get_expired_loan() for (borrower_id, loan_id, recid) in expired_loans: (number_of_letters, date_letters) = get_overdue_letters_info(loan_id) if number_of_letters == 0: content = generate_email_body( CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id) elif number_of_letters == 1 and send_second_recall(date_letters): content = generate_email_body( CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id) elif number_of_letters == 2 and send_third_recall(date_letters): content = generate_email_body( CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) else: content = generate_email_body( CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) title = ''.join(get_fieldvalues(recid, "245__a")) subject = "LOAN RECALL: " + title update_expired_loan(loan_id) #write_message("Updating information about expired loans") send_overdue_letter(borrower_id, subject, content) #write_message("Sending overdue letter") #write_message("Done!!") return 1
def generate_list_to_send(search): ''' Generate a list to send to MSNET. ''' filename = 'tmp_' + __file__ filename = re.sub('.py', '_send.txt', filename) output = open(filename, 'w') recids_nomatch = find_recids_nomatch() print search result_m = perform_request_search(p=search, cc='HEP') print search, len(result_m) search = "035__9:msnet" result_i = perform_request_search(p=search, cc='HEP') search = "0247_2:doi" result_d = perform_request_search(p=search, cc='HEP') result = intbitset(result_m) & intbitset(result_d) - intbitset(result_i) result = result - intbitset(recids_nomatch) for recid in result: try: doi = get_fieldvalues(recid, '0247_a')[0] except IndexError: print 'Problem with:', recid, doi break output.write(str(recid) + ',' + doi + '\n') output.close() print filename