def get_records_status(verbose=False):
    """
    Return 3 sets of bibcodes:
    * bibcodes added are bibcodes that are in ADS and not in Invenio.
    * bibcodes modified are bibcodes that are both in ADS and in Invenio and
      that have been modified since the last update.
    * bibcodes deleted are bibcodes that are in Invenio but not in ADS.
    """
    records_added = []
    records_modified = []
    records_deleted = []

    printmsg(verbose, "Getting ADS timestamps. \n")
    ads_timestamps = _get_ads_timestamps()
    printmsg(verbose, "Getting ADS bibcodes. \n")
    ads_bibcodes = set(ads_timestamps.keys())
    printmsg(verbose, "Getting Invenio bibcodes. \n")
    invenio_bibcodes = _get_invenio_bibcodes()

    printmsg(verbose, "Deducting the added records. \n")
    records_added = ads_bibcodes - invenio_bibcodes
    printmsg(verbose, "    %d records to add." % len(records_added))
    printmsg(verbose, "Deducting the deleted records. \n")
    records_deleted = invenio_bibcodes - ads_bibcodes
    printmsg(verbose, "    %d records to delete." % len(records_deleted))

    records_to_check = invenio_bibcodes - records_deleted
    printmsg(verbose, "Checking timestamps for %d records. \n" % len(records_to_check))

    # TODO: This can probably be sped up by working with chunks of bibcodes
    # instead of single bibcodes.
    for bibcode in records_to_check:
        ads_timestamp = ads_timestamps[bibcode]

        invenio_recid = get_mysql_recid_from_aleph_sysno(bibcode)
        invenio_timestamp = get_fieldvalues(invenio_recid, "995__a")
        if not invenio_timestamp:
            # Maybe we could add instead of exiting.
            printmsg(True, "ERROR: Record %s in Invenio does not " "have a timestamp. \n" % bibcode)
            sys.exit(1)
        elif invenio_timestamp != ads_timestamp:
            records_modified.append(bibcode)

    printmsg(verbose, "Done.")

    return records_added, records_modified, records_deleted
Beispiel #2
0
    def _lookup(self, component, path):
        """ This handler is invoked for the dynamic URLs (for
        collections and records)"""

        if component == 'collection':
            c = '/'.join(path)

            def answer(req, form):
                """Accessing collections cached pages."""
                # Accessing collections: this is for accessing the
                # cached page on top of each collection.

                argd = wash_urlargd(form, search_interface_default_urlargd)

                # We simply return the cached page of the collection
                argd['c'] = c

                if not argd['c']:
                    # collection argument not present; display
                    # home collection by default
                    argd['c'] = CFG_SITE_NAME

                # Treat `as' argument specially:
                if argd.has_key('as'):
                    argd['aas'] = argd['as']
                    del argd['as']
                if argd.get('aas', CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE) not in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES:
                    argd['aas'] = CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE

                return display_collection(req, **argd)

            return answer, []


        elif component == CFG_SITE_RECORD and path and path[0] == 'merge':
            return WebInterfaceMergePages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] == 'edit':
            return WebInterfaceEditPages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit':
            return WebInterfaceMultiEditPages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'):
            return WebInterfaceManageDocFilesPages(), path

        elif component == CFG_SITE_RECORD or component == 'record-restricted':
            try:
                if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
                    # let us try to recognize /<CFG_SITE_RECORD>/<SYSNO> style of URLs:
                    # check for SYSNOs with an embedded slash; needed for [ARXIVINV-15]
                    if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]):
                        path[0] = path[0] + "/" + path[1]
                        del path[1]
                    x = get_mysql_recid_from_aleph_sysno(path[0])
                    if x:
                        recid = x
                    else:
                        recid = int(path[0])
                else:
                    recid = int(path[0])
            except IndexError:
                # display record #1 for URL /CFG_SITE_RECORD without a number
                recid = 1
            except ValueError:
                if path[0] == '':
                    # display record #1 for URL /CFG_SITE_RECORD/ without a number
                    recid = 1
                else:
                    # display page not found for URLs like /CFG_SITE_RECORD/foo
                    return None, []

            from invenio.intbitset import __maxelem__
            if recid <= 0 or recid > __maxelem__:
                # __maxelem__ = 2147483647
                # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649
                return None, []

            format = None
            tab = ''
            try:
                if path[1] in ['', 'files', 'reviews', 'comments', 'usage',
                               'references', 'citations', 'holdings', 'edit',
                               'keywords', 'multiedit', 'merge', 'plots',
                               'linkbacks', 'hepdata']:
                    tab = path[1]
                elif path[1] == 'export':
                    tab = ''
                    format = path[2]
#                    format = None
#                elif path[1] in output_formats:
#                    tab = ''
#                    format = path[1]
                else:
                    # display page not found for URLs like /CFG_SITE_RECORD/references
                    # for a collection where 'references' tabs is not visible
                    return None, []

            except IndexError:
                # Keep normal url if tabs is not specified
                pass

            #if component == 'record-restricted':
                #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:]
            #else:
            return WebInterfaceRecordPages(recid, tab, format), path[1:]
        elif component == 'sslredirect':
            ## Fallback solution for sslredirect special path that should
            ## be rather implemented as an Apache level redirection
            def redirecter(req, form):
                real_url = "http://" + '/'.join(path)
                redirect_to_url(req, real_url)
            return redirecter, []
        elif component == 'doi':
            doi = '/'.join(path)
            def doi_answer(req, form):
                """Resolve DOI"""
                argd = wash_urlargd(form, {'verbose': (int, 0),})
                return resolve_doi(req, doi, verbose=argd['verbose'], ln=argd['ln'])

            return doi_answer, []

        return None, []
Beispiel #3
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix: a prefix displayed before each reference
    @param reference_suffix: a suffix displayed after each reference
    """
    from invenio.config import CFG_BASE_URL, CFG_ADS_SITE
    from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \
         print_record

    if CFG_ADS_SITE:
        ## FIXME: store external sysno into 999 $e, not into 999 $r
        # do not escape field values for now because of things like A&A in
        # 999 $r that are going to be resolved further down:
        references = bfo.fields("999C5", escape=0)
    else:
        references = bfo.fields("999C5", escape=1)
    out = ""

    for reference in references:
        ref_out = ''

        if reference.has_key('o'):
            if out != "":
                ref_out = '</li>'
            ref_out += "<li><small>"+ reference['o']+ "</small> "

        if reference.has_key('m'):
            ref_out += "<small>"+ reference['m']+ "</small> "

        if reference.has_key('r'):
            if CFG_ADS_SITE:
                # 999 $r contains external sysno to be resolved:
                recid_to_display = get_mysql_recid_from_aleph_sysno(reference['r'])
                if recid_to_display:
                    ref_out += print_record(recid_to_display, 'hs')
                else:
                    ref_out += '<small>' + reference['r'] + ' (not in ADS)</small>'
            else:
                ref_out += '<small> [<a href="'+CFG_BASE_URL+'/search?f=reportnumber&amp;p='+ \
                       reference['r']+ \
                       '&amp;ln=' + bfo.lang + \
                       '">'+ reference['r']+ "</a>] </small> <br />"

        if reference.has_key('t'):
            ejournal = bfo.kb("ejournals", reference.get('t', ""))
            if ejournal != "":
                ref_out += ' <small> <a href="https://cds.cern.ch/ejournals.py?publication='\
                      + reference['t'].replace(" ", "+") \
                +"&amp;volume="+reference.get('v', "")+"&amp;year="+\
                reference.get('y', "")+"&amp;page="+\
                reference.get('p',"").split("-")[0]+'">'
                ref_out += reference['t']+": "+reference.get('v', "")+\
                       " ("+reference.get('y', "")+") "
                ref_out += reference.get('p', "")+"</a> </small> <br />"
            else:
                ref_out += " <small> "+reference['t']+ reference.get('v', "")+\
                       reference.get('y',"")+ reference.get('p',"")+ \
                       " </small> <br />"


        if reference_prefix is not None and ref_out != '':
            ref_out = reference_prefix + ref_out
        if reference_suffix is not None and ref_out != '':
            ref_out += reference_suffix

        out += ref_out

    if out != '':
        out += '</li>'

    return out
Beispiel #4
0
    def _lookup(self, component, path):
        """ This handler is invoked for the dynamic URLs (for
        collections and records)"""

        if component == 'collection':
            c = '/'.join(path)

            def answer(req, form):
                """Accessing collections cached pages."""
                # Accessing collections: this is for accessing the
                # cached page on top of each collection.

                argd = wash_urlargd(form, search_interface_default_urlargd)

                # We simply return the cached page of the collection
                argd['c'] = c

                if not argd['c']:
                    # collection argument not present; display
                    # home collection by default
                    argd['c'] = CFG_SITE_NAME

                # Treat `as' argument specially:
                if argd.has_key('as'):
                    argd['aas'] = argd['as']
                    del argd['as']

                return display_collection(req, **argd)

            return answer, []


        elif component == CFG_SITE_RECORD and path and path[0] == 'merge':
            return WebInterfaceMergePages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] == 'edit':
            return WebInterfaceEditPages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit':
            return WebInterfaceMultiEditPages(), path[1:]

        elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'):
            return WebInterfaceManageDocFilesPages(), path

        elif component == CFG_SITE_RECORD or component == 'record-restricted':
            try:
                if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
                    # let us try to recognize /<CFG_SITE_RECORD>/<SYSNO> style of URLs:
                    # check for SYSNOs with an embedded slash; needed for [ARXIVINV-15]
                    if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]):
                        path[0] = path[0] + "/" + path[1]
                        del path[1]
                    x = get_mysql_recid_from_aleph_sysno(path[0])
                    if x:
                        recid = x
                    else:
                        recid = int(path[0])
                else:
                    recid = int(path[0])
            except IndexError:
                # display record #1 for URL /CFG_SITE_RECORD without a number
                recid = 1
            except ValueError:
                if path[0] == '':
                    # display record #1 for URL /CFG_SITE_RECORD/ without a number
                    recid = 1
                else:
                    # display page not found for URLs like /CFG_SITE_RECORD/foo
                    return None, []

            from invenio.intbitset import __maxelem__
            if recid <= 0 or recid > __maxelem__:
                # __maxelem__ = 2147483647
                # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649
                return None, []

            format = None
            tab = ''
            try:
                if path[1] in ['', 'files', 'reviews', 'comments', 'usage',
                               'references', 'citations', 'holdings', 'edit',
                               'keywords', 'multiedit', 'merge', 'plots', 'linkbacks']:
                    tab = path[1]
                elif path[1] == 'export':
                    tab = ''
                    format = path[2]
#                    format = None
#                elif path[1] in output_formats:
#                    tab = ''
#                    format = path[1]
                else:
                    # display page not found for URLs like /CFG_SITE_RECORD/references
                    # for a collection where 'references' tabs is not visible
                    return None, []

            except IndexError:
                # Keep normal url if tabs is not specified
                pass

            #if component == 'record-restricted':
                #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:]
            #else:
            return WebInterfaceRecordPages(recid, tab, format), path[1:]
        elif component == 'sslredirect':
            ## Fallback solution for sslredirect special path that should
            ## be rather implemented as an Apache level redirection
            def redirecter(req, form):
                real_url = "http://" + '/'.join(path)
                redirect_to_url(req, real_url)
            return redirecter, []

        return None, []
Beispiel #5
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix: a prefix displayed before each reference
    @param reference_suffix: a suffix displayed after each reference
    """
    from invenio.config import CFG_SITE_URL, CFG_ADS_SITE
    from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \
         print_record

    if CFG_ADS_SITE:
        ## FIXME: store external sysno into 999 $e, not into 999 $r
        # do not escape field values for now because of things like A&A in
        # 999 $r that are going to be resolved further down:
        references = bfo.fields("999C5", escape=0)
    else:
        references = bfo.fields("999C5", escape=1)
    out = ""

    for reference in references:
        ref_out = ''

        if reference.has_key('o'):
            if out != "":
                ref_out = '</li>'
            ref_out += "<li><small>" + reference['o'] + "</small> "

        if reference.has_key('m'):
            ref_out += "<small>" + reference['m'] + "</small> "

        if reference.has_key('r'):
            if CFG_ADS_SITE:
                # 999 $r contains external sysno to be resolved:
                recid_to_display = get_mysql_recid_from_aleph_sysno(
                    reference['r'])
                if recid_to_display:
                    ref_out += print_record(recid_to_display, 'hs')
                else:
                    ref_out += '<small>' + reference[
                        'r'] + ' (not in ADS)</small>'
            else:
                ref_out += '<small> [<a href="'+CFG_SITE_URL+'/search?f=reportnumber&amp;p='+ \
                       reference['r']+ \
                       '&amp;ln=' + bfo.lang + \
                       '">'+ reference['r']+ "</a>] </small> <br />"

        if reference.has_key('t'):
            ejournal = bfo.kb("ejournals", reference.get('t', ""))
            if ejournal != "":
                ref_out += ' <small> <a href="https://cdsweb.cern.ch/ejournals.py?publication='\
                      + reference['t'].replace(" ", "+") \
                +"&amp;volume="+reference.get('v', "")+"&amp;year="+\
                reference.get('y', "")+"&amp;page="+\
                reference.get('p',"").split("-")[0]+'">'
                ref_out += reference['t']+": "+reference.get('v', "")+\
                       " ("+reference.get('y', "")+") "
                ref_out += reference.get('p', "") + "</a> </small> <br />"
            else:
                ref_out += " <small> "+reference['t']+ reference.get('v', "")+\
                       reference.get('y',"")+ reference.get('p',"")+ \
                       " </small> <br />"

        if reference_prefix is not None and ref_out != '':
            ref_out = reference_prefix + ref_out
        if reference_suffix is not None and ref_out != '':
            ref_out += reference_suffix

        out += ref_out

    if out != '':
        out += '</li>'

    return out
Beispiel #6
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix a prefix displayed before each reference
    @param reference_suffix a suffix displayed after each reference
    """
    from invenio.config import CFG_SITE_URL, CFG_ADS_SITE
    from invenio.search_engine import get_mysql_recid_from_aleph_sysno, print_record

    if CFG_ADS_SITE:
        ## FIXME: store external sysno into 999 $e, not into 999 $r
        # do not escape field values for now because of things like A&A in
        # 999 $r that are going to be resolved further down:
        references = bfo.fields("999C5", escape=0)
    else:
        references = bfo.fields("999C5", escape=1)
    out = ""

    for reference in references:
        ref_out = ""

        if reference.has_key("o"):
            if out != "":
                ref_out = "</li>"
            ref_out += "<li><small>" + reference["o"] + "</small> "

        if reference.has_key("m"):
            ref_out += "<small>" + reference["m"] + "</small> "

        if reference.has_key("r"):
            if CFG_ADS_SITE:
                # 999 $r contains external sysno to be resolved:
                recid_to_display = get_mysql_recid_from_aleph_sysno(reference["r"])
                if recid_to_display:
                    ref_out += print_record(recid_to_display, "hs")
                else:
                    ref_out += "<small>" + reference["r"] + " (not in ADS)</small>"
            else:
                ref_out += (
                    '<small> [<a href="'
                    + CFG_SITE_URL
                    + "/search?f=reportnumber&amp;p="
                    + reference["r"]
                    + "&amp;ln="
                    + bfo.lang
                    + '">'
                    + reference["r"]
                    + "</a>] </small> <br />"
                )

        if reference.has_key("t"):
            ejournal = bfo.kb("ejournals", reference.get("t", ""))
            if ejournal != "":
                ref_out += (
                    ' <small> <a href="http://weblib.cern.ch/cgi-bin/ejournals?publication='
                    + reference["t"].replace(" ", "+")
                    + "&amp;volume="
                    + reference.get("v", "")
                    + "&amp;year="
                    + reference.get("y", "")
                    + "&amp;page="
                    + reference.get("p", "").split("-")[0]
                    + '">'
                )
                ref_out += reference["t"] + ": " + reference.get("v", "") + " (" + reference.get("y", "") + ") "
                ref_out += reference.get("p", "") + "</a> </small> <br />"
            else:
                ref_out += (
                    " <small> "
                    + reference["t"]
                    + reference.get("v", "")
                    + reference.get("y", "")
                    + reference.get("p", "")
                    + " </small> <br />"
                )

        if reference_prefix is not None and ref_out != "":
            ref_out = reference_prefix + ref_out
        if reference_suffix is not None and ref_out != "":
            ref_out += reference_suffix

        out += ref_out

    if out != "":
        out += "</li>"

    return out