def get_records_status(verbose=False): """ Return 3 sets of bibcodes: * bibcodes added are bibcodes that are in ADS and not in Invenio. * bibcodes modified are bibcodes that are both in ADS and in Invenio and that have been modified since the last update. * bibcodes deleted are bibcodes that are in Invenio but not in ADS. """ records_added = [] records_modified = [] records_deleted = [] printmsg(verbose, "Getting ADS timestamps. \n") ads_timestamps = _get_ads_timestamps() printmsg(verbose, "Getting ADS bibcodes. \n") ads_bibcodes = set(ads_timestamps.keys()) printmsg(verbose, "Getting Invenio bibcodes. \n") invenio_bibcodes = _get_invenio_bibcodes() printmsg(verbose, "Deducting the added records. \n") records_added = ads_bibcodes - invenio_bibcodes printmsg(verbose, " %d records to add." % len(records_added)) printmsg(verbose, "Deducting the deleted records. \n") records_deleted = invenio_bibcodes - ads_bibcodes printmsg(verbose, " %d records to delete." % len(records_deleted)) records_to_check = invenio_bibcodes - records_deleted printmsg(verbose, "Checking timestamps for %d records. \n" % len(records_to_check)) # TODO: This can probably be sped up by working with chunks of bibcodes # instead of single bibcodes. for bibcode in records_to_check: ads_timestamp = ads_timestamps[bibcode] invenio_recid = get_mysql_recid_from_aleph_sysno(bibcode) invenio_timestamp = get_fieldvalues(invenio_recid, "995__a") if not invenio_timestamp: # Maybe we could add instead of exiting. printmsg(True, "ERROR: Record %s in Invenio does not " "have a timestamp. \n" % bibcode) sys.exit(1) elif invenio_timestamp != ads_timestamp: records_modified.append(bibcode) printmsg(verbose, "Done.") return records_added, records_modified, records_deleted
def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] if argd.get('aas', CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE) not in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: argd['aas'] = CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE return display_collection(req, **argd) return answer, [] elif component == CFG_SITE_RECORD and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'): return WebInterfaceManageDocFilesPages(), path elif component == CFG_SITE_RECORD or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize /<CFG_SITE_RECORD>/<SYSNO> style of URLs: # check for SYSNOs with an embedded slash; needed for [ARXIVINV-15] if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]): path[0] = path[0] + "/" + path[1] del path[1] x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /CFG_SITE_RECORD without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /CFG_SITE_RECORD/ without a number recid = 1 else: # display page not found for URLs like /CFG_SITE_RECORD/foo return None, [] from invenio.intbitset import __maxelem__ if recid <= 0 or recid > __maxelem__: # __maxelem__ = 2147483647 # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots', 'linkbacks', 'hepdata']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /CFG_SITE_RECORD/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] elif component == 'sslredirect': ## Fallback solution for sslredirect special path that should ## be rather implemented as an Apache level redirection def redirecter(req, form): real_url = "http://" + '/'.join(path) redirect_to_url(req, real_url) return redirecter, [] elif component == 'doi': doi = '/'.join(path) def doi_answer(req, form): """Resolve DOI""" argd = wash_urlargd(form, {'verbose': (int, 0),}) return resolve_doi(req, doi, verbose=argd['verbose'], ln=argd['ln']) return doi_answer, [] return None, []
def format_element(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix: a prefix displayed before each reference @param reference_suffix: a suffix displayed after each reference """ from invenio.config import CFG_BASE_URL, CFG_ADS_SITE from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \ print_record if CFG_ADS_SITE: ## FIXME: store external sysno into 999 $e, not into 999 $r # do not escape field values for now because of things like A&A in # 999 $r that are going to be resolved further down: references = bfo.fields("999C5", escape=0) else: references = bfo.fields("999C5", escape=1) out = "" for reference in references: ref_out = '' if reference.has_key('o'): if out != "": ref_out = '</li>' ref_out += "<li><small>"+ reference['o']+ "</small> " if reference.has_key('m'): ref_out += "<small>"+ reference['m']+ "</small> " if reference.has_key('r'): if CFG_ADS_SITE: # 999 $r contains external sysno to be resolved: recid_to_display = get_mysql_recid_from_aleph_sysno(reference['r']) if recid_to_display: ref_out += print_record(recid_to_display, 'hs') else: ref_out += '<small>' + reference['r'] + ' (not in ADS)</small>' else: ref_out += '<small> [<a href="'+CFG_BASE_URL+'/search?f=reportnumber&p='+ \ reference['r']+ \ '&ln=' + bfo.lang + \ '">'+ reference['r']+ "</a>] </small> <br />" if reference.has_key('t'): ejournal = bfo.kb("ejournals", reference.get('t', "")) if ejournal != "": ref_out += ' <small> <a href="https://cds.cern.ch/ejournals.py?publication='\ + reference['t'].replace(" ", "+") \ +"&volume="+reference.get('v', "")+"&year="+\ reference.get('y', "")+"&page="+\ reference.get('p',"").split("-")[0]+'">' ref_out += reference['t']+": "+reference.get('v', "")+\ " ("+reference.get('y', "")+") " ref_out += reference.get('p', "")+"</a> </small> <br />" else: ref_out += " <small> "+reference['t']+ reference.get('v', "")+\ reference.get('y',"")+ reference.get('p',"")+ \ " </small> <br />" if reference_prefix is not None and ref_out != '': ref_out = reference_prefix + ref_out if reference_suffix is not None and ref_out != '': ref_out += reference_suffix out += ref_out if out != '': out += '</li>' return out
def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] return display_collection(req, **argd) return answer, [] elif component == CFG_SITE_RECORD and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'): return WebInterfaceManageDocFilesPages(), path elif component == CFG_SITE_RECORD or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize /<CFG_SITE_RECORD>/<SYSNO> style of URLs: # check for SYSNOs with an embedded slash; needed for [ARXIVINV-15] if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]): path[0] = path[0] + "/" + path[1] del path[1] x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /CFG_SITE_RECORD without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /CFG_SITE_RECORD/ without a number recid = 1 else: # display page not found for URLs like /CFG_SITE_RECORD/foo return None, [] from invenio.intbitset import __maxelem__ if recid <= 0 or recid > __maxelem__: # __maxelem__ = 2147483647 # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots', 'linkbacks']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /CFG_SITE_RECORD/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] elif component == 'sslredirect': ## Fallback solution for sslredirect special path that should ## be rather implemented as an Apache level redirection def redirecter(req, form): real_url = "http://" + '/'.join(path) redirect_to_url(req, real_url) return redirecter, [] return None, []
def format_element(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix: a prefix displayed before each reference @param reference_suffix: a suffix displayed after each reference """ from invenio.config import CFG_SITE_URL, CFG_ADS_SITE from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \ print_record if CFG_ADS_SITE: ## FIXME: store external sysno into 999 $e, not into 999 $r # do not escape field values for now because of things like A&A in # 999 $r that are going to be resolved further down: references = bfo.fields("999C5", escape=0) else: references = bfo.fields("999C5", escape=1) out = "" for reference in references: ref_out = '' if reference.has_key('o'): if out != "": ref_out = '</li>' ref_out += "<li><small>" + reference['o'] + "</small> " if reference.has_key('m'): ref_out += "<small>" + reference['m'] + "</small> " if reference.has_key('r'): if CFG_ADS_SITE: # 999 $r contains external sysno to be resolved: recid_to_display = get_mysql_recid_from_aleph_sysno( reference['r']) if recid_to_display: ref_out += print_record(recid_to_display, 'hs') else: ref_out += '<small>' + reference[ 'r'] + ' (not in ADS)</small>' else: ref_out += '<small> [<a href="'+CFG_SITE_URL+'/search?f=reportnumber&p='+ \ reference['r']+ \ '&ln=' + bfo.lang + \ '">'+ reference['r']+ "</a>] </small> <br />" if reference.has_key('t'): ejournal = bfo.kb("ejournals", reference.get('t', "")) if ejournal != "": ref_out += ' <small> <a href="https://cdsweb.cern.ch/ejournals.py?publication='\ + reference['t'].replace(" ", "+") \ +"&volume="+reference.get('v', "")+"&year="+\ reference.get('y', "")+"&page="+\ reference.get('p',"").split("-")[0]+'">' ref_out += reference['t']+": "+reference.get('v', "")+\ " ("+reference.get('y', "")+") " ref_out += reference.get('p', "") + "</a> </small> <br />" else: ref_out += " <small> "+reference['t']+ reference.get('v', "")+\ reference.get('y',"")+ reference.get('p',"")+ \ " </small> <br />" if reference_prefix is not None and ref_out != '': ref_out = reference_prefix + ref_out if reference_suffix is not None and ref_out != '': ref_out += reference_suffix out += ref_out if out != '': out += '</li>' return out
def format_element(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix a prefix displayed before each reference @param reference_suffix a suffix displayed after each reference """ from invenio.config import CFG_SITE_URL, CFG_ADS_SITE from invenio.search_engine import get_mysql_recid_from_aleph_sysno, print_record if CFG_ADS_SITE: ## FIXME: store external sysno into 999 $e, not into 999 $r # do not escape field values for now because of things like A&A in # 999 $r that are going to be resolved further down: references = bfo.fields("999C5", escape=0) else: references = bfo.fields("999C5", escape=1) out = "" for reference in references: ref_out = "" if reference.has_key("o"): if out != "": ref_out = "</li>" ref_out += "<li><small>" + reference["o"] + "</small> " if reference.has_key("m"): ref_out += "<small>" + reference["m"] + "</small> " if reference.has_key("r"): if CFG_ADS_SITE: # 999 $r contains external sysno to be resolved: recid_to_display = get_mysql_recid_from_aleph_sysno(reference["r"]) if recid_to_display: ref_out += print_record(recid_to_display, "hs") else: ref_out += "<small>" + reference["r"] + " (not in ADS)</small>" else: ref_out += ( '<small> [<a href="' + CFG_SITE_URL + "/search?f=reportnumber&p=" + reference["r"] + "&ln=" + bfo.lang + '">' + reference["r"] + "</a>] </small> <br />" ) if reference.has_key("t"): ejournal = bfo.kb("ejournals", reference.get("t", "")) if ejournal != "": ref_out += ( ' <small> <a href="http://weblib.cern.ch/cgi-bin/ejournals?publication=' + reference["t"].replace(" ", "+") + "&volume=" + reference.get("v", "") + "&year=" + reference.get("y", "") + "&page=" + reference.get("p", "").split("-")[0] + '">' ) ref_out += reference["t"] + ": " + reference.get("v", "") + " (" + reference.get("y", "") + ") " ref_out += reference.get("p", "") + "</a> </small> <br />" else: ref_out += ( " <small> " + reference["t"] + reference.get("v", "") + reference.get("y", "") + reference.get("p", "") + " </small> <br />" ) if reference_prefix is not None and ref_out != "": ref_out = reference_prefix + ref_out if reference_suffix is not None and ref_out != "": ref_out += reference_suffix out += ref_out if out != "": out += "</li>" return out