def get_eprint(recid): """Get the eprintt number from a record.""" report_fermilab = None eprint = None url = None reports = get_fieldvalues(recid, '037__a') reports = reports + get_fieldvalues(recid, '037__z') if VERBOSE: print reports for report in reports: if re.search("FERMILAB", report): report_fermilab = report if VERBOSE: print report_fermilab if not report_fermilab: return None bfo = BibFormatObject(recid) eprint = bfe_arxiv.get_arxiv(bfo, category="no") if VERBOSE: print eprint if eprint: eprint = eprint[0] print report_fermilab, eprint return None for url_i in get_fieldvalues(recid, '8564_u'): if re.match(r'https?://inspirehep.net.*pdf', url_i): url = url_i for item in BibFormatObject(int(recid)).fields('8564_'): if item.has_key('y') or item.has_key('z') and item.has_key('u'): try: if re.search('fermilab', item['y'].lower()): return None except KeyError: pass if item['u'].endswith('pdf'): url = item['u'] try: if item['y'].lower() == 'fulltext': url = item['u'] if item['y'].lower() == 'poster': url = None if item['y'].lower() == 'slides': url = None except KeyError: pass try: if item['z'].lower() == 'openaccess': url = item['u'] except KeyError: pass if url: print report_fermilab, url
def eval_bibformat_4suite(ctx, recID, template_code): """ 4suite extension function: Bridge between BibFormat and XSL stylesheets. Returns the evaluation of the given piece of format template Can be used in that way in XSL stylesheet (provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared): <xsl:value-of select="fn:eval_bibformat(marc:controlfield[@tag='001'],'<BFE_SERVER_INFO var="recurl">')" /> if recID is string, value is converted to int if recID is Node, first child node (text node) is taken as value template_code is evaluated as a format template piece of code. '<' and '"' need to be escaped with '<' and '"' """ from invenio.bibformat_engine import \ format_with_format_template, \ BibFormatObject try: if len(recID) > 0 and isinstance(recID[0], Node): recID_int = recID[0].firstChild.nodeValue if recID_int is None: return '' else: recID_int = int(recID_int) bfo = BibFormatObject(recID_int) return format_with_format_template( None, bfo, verbose=0, format_template_code=template_code)[0] except Exception, err: sys.stderr.write("Error during formatting function evaluation: " + \ str(err) + \ '\n') return ''
def format_element(bfo): """ List the 'featured' records """ args = parse_url_string(bfo.user_info['uri']) journal_name = args["journal_name"] featured_records = get_featured_records(journal_name) lines = [] for (recid, img_url) in featured_records: featured_record = BibFormatObject(recid) if bfo.lang == 'fr': title = featured_record.field('246_1a') if title == '': # No French translation, get it in English title = featured_record.field('245__a') else: title = featured_record.field('245__a') lines.append(''' <a href="%s/record/%s?ln=%s" style="display:block"> <img src="%s" alt="" width="100" class="phr" /> %s </a> ''' % (CFG_SITE_URL, recid, bfo.lang, img_url, title)) return '<br/><br/>'.join(lines)
def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln): """ Answer question given by context. Return (relevance, html_string) where relevance is integer from 0 to 100 indicating how relevant to the question the answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) , and html_string being a formatted answer. """ if f: return (0, '') words = [unit[1].lower() for unit in search_units if unit[2] == ""] if not words: return (0, '') _ = gettext_set_language(ln) if not _("weather").lower() in words and \ not "météo" in words and \ not "meteo" in words: return (0, '') bfo = BibFormatObject(0) if meteoblue_widget_available_p: output = bfe_webjournal_widget_weather_meteoblue.format_element( bfo) else: output = bfe_webjournal_widget_weather.format_element( bfo, display_weather_icon='true') if not output: return (0, '') return (100, output)
def get_id(record, id_type=None): """Returns any id with a HEPNames recid""" author_id = None for item in BibFormatObject(record).fields('035__'): if item.has_key('9') and item['9'] == id_type and item.has_key('a'): author_id = item['a'] return author_id
def get_eprint_id(recid): """ Find the arxiv number from an INSPIRE record """ osti_id = None for item in BibFormatObject(int(recid)).fields('037__'): if item.has_key('9') and item.has_key('a'): if item['9'].lower() == 'arxiv': arxiv_id = item['a'] return arxiv_id
def testAff(self): """testing Affs""" from bfe_CERN_authors import format_element self.bfo = BibFormatObject('7374') string = format_element(self.bfo, limit="5", print_affiliations="yes") self.assert_(re.search(r'Farhi, E.</a>', string)) self.assert_(re.search(r'</a> \(<a.*MIT', string))
def testarXiv(self): """INSPIRE arXiv format""" from bfe_INSPIRE_arxiv import format_element self.bfo = BibFormatObject('1') string = format_element(self.bfo) self.assert_(re.search(r'0299', string)) self.assert_(not re.search(r'CERN', string)) self.assert_(re.search(r'hep-th', string))
def testDate(self): """testing date""" from bfe_INSPIRE_date import format_element self.bfo = BibFormatObject('6194') string = format_element(self.bfo) print string string2 = format_element(self.bfo, us="no") print string2
def get_osti_id(recid): """ Find the osti_id from an INSPIRE record """ osti_id = None for item in BibFormatObject(int(recid)).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'].lower() == 'osti': osti_id = item['a'] return osti_id
def get_bfx_record(recID): ''' Get a record with a specific recID. @param recID: the ID of the record @return: a record in the structure defined here ''' bfo = BibFormatObject(recID) return convert_record(bfo.get_record())
def testarX(self): """testing arXiv""" from bfe_INSPIRE_arxiv import format_element self.bfo = BibFormatObject('37650') string = format_element(self.bfo) print string self.assert_(re.search(r'3066', string)) self.assert_(not re.search(r'CERN', string)) self.assert_(re.search(r'hep-ph', string))
def _get_report_numbers(record_id): from invenio.bibformat_engine import BibFormatObject bfo = BibFormatObject(record_id) fields = bfo.fields('037__') report_numbers = [] for field in fields: if 'a' in field: report_numbers.append(field['a']) return report_numbers
def get_widget_html(language, max_photos, collections, separator, ln): """ Returns the content of the widget """ latest_photo_ids = perform_request_search(c=collections, rg=max_photos, of='id') images_urls = [] for recid in latest_photo_ids[:max_photos]: try: photo_record = BibFormatObject(recid) except: # todo: Exception, no photo in this selection continue if language == "fr": try: title = photo_record.fields('246_1a', escape=1)[0] except KeyError: try: title = photo_record.fields('245__a', escape=1)[0] except: title = "" else: try: title = photo_record.fields('245__a', escape=1)[0] except KeyError: # todo: exception, picture with no title title = "" if CFG_CERN_SITE and photo_record.fields('8567_'): # Get from 8567_ dfs_images = photo_record.fields('8567_') for image_block in dfs_images: if image_block.get("y", '') == "Icon": if image_block.get("u", '').startswith("http://"): images_urls.append((recid, image_block["u"], title)) break # Just one image per record else: # Get from 8564_ images = photo_record.fields('8564_') for image_block in images: if image_block.get("x", '').lower() == "icon": if image_block.get("q", '').startswith("http://"): images_urls.append((recid, image_block["q"], title)) break # Just one image per record # Build output html_out = separator.join([ '<a href="%s/%s/%i?ln=%s"><img class="phr" width="100" height="67" src="%s"/>%s</a>' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, ln, photo_url, title) for (recid, photo_url, title) in images_urls ]) return html_out
def cite_as(bfo, publisher): """ HepData format example: Cite as: The ATLAS Collaboration (2013) HepData, doi: 10.1234/123456 Dataverse format example: Cranmer, Kyle; Allanach, Ben; Lester, Christopher; Weber, Arne, "Replication data for: "Natural Priors, CMSSM Fits and LHC Weather Forecasts"", http://hdl.handle.net/1902.1/21804 INSPIRE format example: Cite as: The ATLAS Collaboration (2013) INSPIRE, doi: 10.1234/123456 """ from invenio.bibformat_engine import BibFormatObject if publisher == "Dataverse": return dataverse_cite_as(bfo) elif publisher == 'authors': return "" colls = [] for coll in bfo.fields("710__g"): if coll not in colls: colls.append(coll) try: parent_recid = int(bfo.field("786__w")) bfo_parent = BibFormatObject(parent_recid) year = get_year(bfo_parent) except ValueError: # No parent record available year = get_year(bfo) if publisher == 'HEPDATA': publisher = 'HepData' elif publisher == "INSPIRE": publisher == "INSPIRE-HEP" pid_type = bfo.field("0247_2") pid = bfo.field("0247_a") out = '' out += "<b>Cite as: </b>" if colls: out += str(colls[0]) if year: out += ' ( ' + str(year) + ' ) ' out += publisher + ', ' if pid_type == 'DOI': out += '<a href="http://doi.org/' + pid + '" target="_blank" > http://doi.org/' + pid + '</a>' elif pid_type == 'HDL': out += '<a href="http://hdl.handle.net/' + pid + '" target="_blank" > http://hdl.handle.net/' + pid + '</a>' elif pid_type == '': out += '[no persistent identifier assigned]' return out
def create_xml(recid=None, osti_id=None, doi=None): osti_exists = False doi_exists = False osti_mismatch = False mismatches = [] osti_subfields = [('9', 'OSTI'), ('a', osti_id)] record = get_record(recid) record_link = '<a href="http://inspirehep.net/record/%s">%s</a>' % ( str(recid), str(recid)) append_record = {} additions = False errors = None for item in BibFormatObject(recid).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: osti_exists = True elif item['9'] == 'OSTI' and item['a'] != osti_id: osti_mismatch = True mismatches.append(item['a']) for item in BibFormatObject(recid).fields('0247_'): if item.has_key('2') and item.has_key('a'): if item['2'] == 'DOI' and item['a'] == doi: doi_exists = True if osti_exists is False and osti_mismatch is True: print str(recid), "already has a different OSTI ID" errors = "doi %s in record %s should match OSTI ID %s, but the record already contains OSTI ID(s) %s<br />" % ( doi, record_link, osti_id, ','.join(mismatches)) return errors if doi_exists is False and osti_exists is True: print str(recid), "contains an OSTI ID but no doi" no_doi = "%s contains OSTI ID %s but not doi %s<br />" % (record_link, osti_id, doi) return no_doi if osti_exists is False and osti_mismatch is False: record_add_field(append_record, '001', controlfield_value=str(recid)) record_add_field(append_record, '035', '', '', subfields=osti_subfields) print "%s: added 035__a:%s" % (str(recid), osti_id) return print_rec(append_record)
def testLinks(self): """testing INSPIRE Links""" from bfe_INSPIRE_links import format_element self.bfo = BibFormatObject('1') string = format_element(self.bfo, separator='</li>\n<li>', prefix="<ul><li>", suffix="</li></ul>") self.assert_(re.search(r'1012.0299">Abstract<', string)) self.assert_( re.search(r'arXiv:1012.0299">PDF</a> from arXiv.org', string))
def testLinks(self): """testing Links""" from bfe_INSPIRE_links import format_element self.bfo = BibFormatObject('37650') string = format_element(self.bfo, separator='</li>\n<li>', prefix="<ul><li>", suffix="</li></ul>") print string self.assert_(re.search(r'065201">Journal', string)) self.assert_(re.search(r'\?bibcode=2004', string))
def get_hepnames_anyid_from_recid(record, id_type): """ Returns any id with a HEPNames recid """ record = int(record) author_id = None for item in BibFormatObject(record).fields('035__'): if item.has_key('9') and item['9'] == id_type and item.has_key('a'): author_id = item['a'] if VERBOSE and not author_id: print "WARNING: no %s ID found for %s: " % (id_type, record) return author_id
def format_element(bfo, separator='; '): """ Prints the list of the "children" institutions """ from invenio.search_engine import search_pattern from invenio.bibformat_engine import BibFormatObject recID = str(bfo.recID) out = "" children = [] if not recID: #Something is wrong, return empty string return out all_institutions = search_pattern(p="510__0:" + str(recID)) for institution_id in all_institutions: for field in BibFormatObject(institution_id).fields('510__'): if field.get('0') == str(recID) and field.get('w') == 't': children.append(institution_id) if children: out += "Subsidiary Institution: " for item in children: # get the abbreviated name of the institution abbrev = BibFormatObject(item).field('110__t') # if there is no abbreviated name, we try different names if not abbrev: abbrev = BibFormatObject(item).field('110__u') if not abbrev: abbrev = BibFormatObject(item).field('110__a') if not abbrev: # if no name is found, we display record ID as a text of the link abbrev = item out += '<a href="/record/' + str(item) + '">' + str(abbrev) \ + '</a>' + separator # remove last separator and space, then return the string out = out[:-2] return out
def get_hepnames_aff_from_recid(record, id_type): """ Returns the current affiliation """ record = int(record) affiliation = None for item in BibFormatObject(record).fields('371__'): if item.has_key('z') and item['z'].lower() == id_type.lower() \ and item.has_key('a'): affiliation = item['a'] if VERBOSE and not affiliation: print "WARNING: no %s ID found for %s: " % (id_type, record) return affiliation
def call_function(self, function_name, parameters=None): ''' Call an external element which is a Python file, using BibFormat @param function_name: the name of the function to call @param parameters: a dictionary of the parameters to pass as key=value pairs @return: a string value, which is the result of the function call ''' if parameters is None: parameters = {} bfo = BibFormatObject(self.recID) format_element = get_format_element(function_name) (value, errors) = eval_format_element(format_element, bfo, parameters) #to do: check errors from function call return value
def eval_bibformat_lxml(ctx, recID, template_code): """ libxslt extension function: Bridge between BibFormat and XSL stylesheets. Returns the evaluation of the given piece of format template Can be used in that way in XSL stylesheet (provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared): <xsl:value-of select="fn:eval_bibformat(marc:controlfield[@tag='001'],'<BFE_SERVER_INFO var="recurl">')" /> if recID is string, value is converted to int if recID is Node, first child node (text node) is taken as value template_code is evaluated as a format template piece of code. '<' and '"' need to be escaped with '<' and '"' @param ctx: context as passed by lxml @param recID: record ID @param template_code: the code calling a BFE_ as it would be use in format template @return: the evalued call to a format template (usually a call to a format element) @rtype: string """ #' from invenio.bibformat_engine import \ format_with_format_template, \ BibFormatObject try: if isinstance(recID, str): recID_int = int(recID) elif isinstance(recID, (int, long)): recID_int = recID elif isinstance(recID, list): recID = recID[0] if isinstance(recID, str): recID_int = int(recID) else: recID_int = int(recID.text) else: recID_int = int(recID.text) bfo = BibFormatObject(recID_int) return format_with_format_template(None, bfo, verbose=0, format_template_code=template_code) except Exception, err: sys.stderr.write("Error during formatting function evaluation: " + \ str(err) + \ '\n') return ''
def schemaorg_type(recid=None, bfo=None): if recid: from invenio.bibformat_engine import BibFormatObject bfo = BibFormatObject(recid) if bfo: from invenio.openaire_deposit_config import CFG_OPENAIRE_SCHEMAORG_MAP collections = bfo.fields('980__') for c in collections: a = c.get('a', None) b = c.get('b', None) res = CFG_OPENAIRE_SCHEMAORG_MAP.get(b if b else a, None) if res: return res return 'http://schema.org/CreativeWork'
def get_author_details(recid, authors, tag): """Get authors broken out as individuals""" for item in BibFormatObject(int(recid)).fields(tag): authors_detail = ET.SubElement(authors, 'authors_detail') author = None last_name = None first_name = None middle_name = None affiliation = None email = None orcid = None if item.has_key('a'): author = item['a'] try: matchobj = re.match(r'(.*)\, (.*)\, (.*)', author) last_name = matchobj.group(1) fore_name = matchobj.group(2) title = matchobj.group(3) fore_name = fore_name + ', ' + title except AttributeError: last_name = re.sub(r'\,.*', '', author) fore_name = re.sub(r'.*\, ', '', author) if re.search(r' ', fore_name): first_name = re.sub(r' .*', '', fore_name) middle_name = re.sub(r'.* ', '', fore_name) elif re.search(r'^\w\.\w\.', fore_name): first_name = re.sub(r'^(\w\.).*', r'\1', fore_name) middle_name = re.sub(r'^\w\.', '', fore_name) else: first_name = fore_name if item.has_key('u'): affiliation = item['u'] if item.has_key('m'): email = item['m'] email = email.replace('email:', '') for item_jk in ['j', 'k']: if item.has_key(item_jk): orcid = item[item_jk] if not re.search(r'ORCID:', orcid): orcid = None else: orcid = re.sub(r'ORCID:', '', orcid) ET.SubElement(authors_detail, 'first_name').text = first_name ET.SubElement(authors_detail, 'middle_name').text = middle_name ET.SubElement(authors_detail, 'last_name').text = last_name ET.SubElement(authors_detail, 'affiliation').text = affiliation ET.SubElement(authors_detail, 'private_email').text = email ET.SubElement(authors_detail, 'orcid_id').text = orcid
def testDate(self): """INSPIRE date format""" from bfe_INSPIRE_date import format_element, parse_date # Test parse date function self.assert_(not parse_date(None)) self.assert_(not parse_date("")) self.assert_(not parse_date("This is bad input")) self.assert_(not parse_date([1, 2, 4, "test"])) self.assert_(parse_date("2003-05-02") == (2003, 5, 2)) self.assert_(parse_date("20030502") == (2003, 5, 2)) self.assert_(parse_date("2003-05") == (2003, 5)) self.assert_(parse_date("200305") == (2003, 5)) self.assert_(parse_date("2003") == (2003, )) # Expect date from 269__$$c self.bfo = BibFormatObject('1') string = format_element(self.bfo) self.assert_(re.search(r'Dec 2010', string))
def _get_breaking_news(lang, journal_name): """ Gets the 'Breaking News' articles that are currently active according to start and end dates. """ # CERN Bulletin only if not journal_name.lower() == 'cernbulletin': return '' # Look for active breaking news breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \ if record_exists(recid) == 1] today = time.mktime(time.localtime()) breaking_news = "" for recid in breaking_news_recids: temp_rec = BibFormatObject(recid) try: end_date = time.mktime( time.strptime(temp_rec.field("925__b"), "%m/%d/%Y")) except: end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y")) if end_date < today: continue try: start_date = time.mktime( time.strptime(temp_rec.field("925__a"), "%m/%d/%Y")) except: start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y")) if start_date > today: continue publish_date = temp_rec.field("269__c") if lang == 'fr': title = temp_rec.field("246_1a") else: title = temp_rec.field("245__a") breaking_news += ''' <h2 class="%s">%s<br/> <strong> <a href="%s/journal/popup?name=%s&type=breaking_news&record=%s&ln=%s" target="_blank">%s</a> </strong> </h2> ''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title) if breaking_news: breaking_news = '<li>%s</li>' % breaking_news return breaking_news
def format_element(bfo): """ Prints the list of papers containing the dataset by title. """ from invenio.bibformat_engine import BibFormatObject from invenio.config import CFG_BASE_URL, CFG_SITE_RECORD parent_recid = int(bfo.field("786__w")) bfo_parent = BibFormatObject(parent_recid) title = bfo_parent.field("245__a") url = CFG_BASE_URL + '/' + CFG_SITE_RECORD + '/' + str(bfo_parent.recID) out = "This dataset complements the following publication: <br />" out += "<a href=\"" + url + "\">" + title + "</a>" return out
def create_xml(osti_id, inspire_id): """ The function checks if the OSTI ID should be added to INSPIRE. If so, it builds up that information. """ osti_id = str(osti_id) recid = str(inspire_id) recid = recid.replace('oai:inspirehep.net:', '') search = "001:" + recid result = perform_request_search(p=search, cc='Fermilab') if len(result) != 1: print 'No such INSPIRE record', recid return None create_osti_id_pdf(recid, osti_id) search = "001:" + recid + " 035__a:" + osti_id result = perform_request_search(p=search, cc='Fermilab') if len(result) == 1: return None search = "035__9:osti 035__a:" + str(osti_id) result = perform_request_search(p=search, cc='Fermilab') if len(result) == 1: for item in BibFormatObject(int(recid)).fields('035__'): if item.has_key('9') and item.has_key('a'): if item['9'] == 'OSTI' and item['a'] == osti_id: print 'OSTI ID', osti_id, 'already on', result[0] return None search = "001:" + recid + " -035__9:OSTI" if TEST: print search result = perform_request_search(p=search, cc='Fermilab') if not len(result) == 1: print search, result print 'Problem with', recid, osti_id return False if TEST: print result record = {} record_add_field(record, '001', controlfield_value=str(recid)) new_id = [('a', osti_id), ('9', 'OSTI')] record_add_field(record, '035', '', '', subfields=new_id) #create_osti_id_pdf(recid, osti_id) return print_rec(record)
def get_doi_from_record(recid): """ Given a record ID we fetch it from the DB and return the first DOI found as specified by the config variable CFG_APSHARVEST_RECORD_DOI_TAG. @param recid: record id record containing a DOI @type recid: string/int @return: first DOI found in record @rtype: string """ record = BibFormatObject(int(recid)) possible_dois = record.fields(CFG_APSHARVEST_RECORD_DOI_TAG[:-1]) for doi in possible_dois: if '2' in doi and doi.get('2', "") == "DOI": # Valid DOI present, add it try: return doi['a'] except KeyError: continue