def prepare_data_cache(self):
        """
        "Index" submission names
        """
        from invenio.websubmit_dblayer import get_categories_of_doctype
        res = run_sql("SELECT sdocname, ldocname FROM sbmDOCTYPE")

        # TODO: only consider submissions that are attached to the tree

        if CFG_CERN_SITE:
            for submission_name, submission_config in CERN_GENSBM_SUBMISSIONS_CONFIG.iteritems(
            ):
                if not submission_config.has_key('redirect'):
                    res += (('GENSBM#' +
                             nmtoken_from_string(cgi.escape(submission_name)),
                             submission_name), )

        cache = {}
        for doctype, submission_name in res:
            ## categories_and_submission_name = ' '.join(get_categories_of_doctype(doctype)) + \
            ##                                  ' ' + submission_name

            # Add submission name info
            if CFG_CERN_SITE and doctype in ('ALIPH', 'BULIS', 'CMSREL',
                                             'BULBN', 'BSA'):
                # These submissions are not interesting here
                continue
            for word in clean_and_split_words_and_stem(submission_name):
                if not word.strip():
                    continue
                if not cache.has_key(word):
                    cache[word] = []
                item = (doctype, submission_name, '*')
                if not item in cache[word]:
                    cache[word].append(item)

            # Add submission categories info
            if CFG_CERN_SITE and doctype in ('CMSPUB', 'CMSCOM', 'CMSCMC',
                                             'ATLPUB', 'ATLCOM', 'ATLCMC',
                                             'LHCBPB', 'LHCPCM', 'LHCBCC'):
                # These categories are not interesting here
                continue
            categories = get_categories_of_doctype(doctype)
            for dummy, category, dummy in categories:
                for word in clean_and_split_words_and_stem(submission_name +
                                                           ' ' + category):
                    if not word.strip():
                        continue
                    if not cache.has_key(word):
                        cache[word] = []
                    item = (doctype, "%s (%s)" % (category, submission_name),
                            category)
                    if not item in cache[word]:
                        cache[word].append(item)

        return cache
    def test_convert_string_to_nmtoken(self):
        """htmlutils - converting string to Nmtoken"""

        # TODO: possibly extend this test to include 'extenders' and
        # 'combining characters' as defined in
        # http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken

        ascii_str = "".join([chr(i) for i in range(0, 256)])
        nmtoken = nmtoken_from_string(ascii_str)
        for char in nmtoken:
            self.assert_(char in ['.', '-', '_', ':'] or char.isalnum())
Exemple #3
0
    def test_convert_string_to_nmtoken(self):
        """htmlutils - converting string to Nmtoken"""

        # TODO: possibly extend this test to include 'extenders' and
        # 'combining characters' as defined in
        # http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken

        ascii_str = "".join([chr(i) for i in range(0, 256)])
        nmtoken = nmtoken_from_string(ascii_str)
        for char in nmtoken:
            self.assert_(char in ['.', '-', '_', ':'] or char.isalnum())
    def tmpl_draw_subtitle(self, ln, title, subtitle, guideurl):
        """Draws an html title bar
          - 'title' *string* - The name of the titlebar
          - 'subtitle' *string* - The header name of the subtitle
          - 'guideurl' *string* - The relative url of the guide relative to this section
          """
        _ = gettext_set_language(ln)
        guidetitle = _("See Guide")

        titlebar = """<a name="%s">""" % nmtoken_from_string(title)
        titlebar += """ </a>%s&nbsp;&nbsp;&nbsp;<small>""" % subtitle
        titlebar += """ [<a title="%s" href="%s/%s">?</a>]</small>""" % (guidetitle, CFG_SITE_URL, guideurl)
        return titlebar
Exemple #5
0
    def tmpl_draw_subtitle(self, ln, title, subtitle, guideurl):
        """Draws an html title bar
          - 'title' *string* - The name of the titlebar
          - 'subtitle' *string* - The header name of the subtitle
          - 'guideurl' *string* - The relative url of the guide relative to this section
          """
        _ = gettext_set_language(ln)
        guidetitle = _("See Guide")

        titlebar = """<a name="%s">""" % nmtoken_from_string(title)
        titlebar += """ </a>%s&nbsp;&nbsp;&nbsp;<small>""" % subtitle
        titlebar += """ [<a title="%s" href="%s/%s">?</a>]</small>""" % (guidetitle, CFG_SITE_URL, guideurl)
        return titlebar
    def prepare_data_cache(self):
        """
        "Index" submission names
        """
        from invenio.websubmit_dblayer import get_categories_of_doctype
        res = run_sql("SELECT sdocname, ldocname FROM sbmDOCTYPE")

        # TODO: only consider submissions that are attached to the tree

        if CFG_CERN_SITE:
            for submission_name, submission_config in CERN_GENSBM_SUBMISSIONS_CONFIG.iteritems():
                if not submission_config.has_key('redirect'):
                    res += (('GENSBM#' + nmtoken_from_string(cgi.escape(submission_name)), submission_name),)

        cache = {}
        for doctype, submission_name in res:
            ## categories_and_submission_name = ' '.join(get_categories_of_doctype(doctype)) + \
            ##                                  ' ' + submission_name


            # Add submission name info
            if CFG_CERN_SITE and doctype in ('ALIPH', 'BULIS', 'CMSREL', 'BULBN', 'BSA'):
                # These submissions are not interesting here
                continue
            for word in clean_and_split_words_and_stem(submission_name):
                if not word.strip():
                    continue
                if not cache.has_key(word):
                    cache[word] = []
                item = (doctype, submission_name, '*')
                if not item in cache[word]:
                    cache[word].append(item)

            # Add submission categories info
            if CFG_CERN_SITE and doctype in ('CMSPUB', 'CMSCOM', 'CMSCMC',
                                             'ATLPUB', 'ATLCOM', 'ATLCMC',
                                             'LHCBPB', 'LHCPCM', 'LHCBCC'):
                # These categories are not interesting here
                continue
            categories = get_categories_of_doctype(doctype)
            for dummy, category, dummy in categories:
                for word in clean_and_split_words_and_stem(submission_name + ' ' + category):
                    if not word.strip():
                        continue
                    if not cache.has_key(word):
                        cache[word] = []
                    item = (doctype, "%s (%s)" % (category, submission_name), category)
                    if not item in cache[word]:
                        cache[word].append(item)

        return cache
Exemple #7
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')

    # deduce collection id:
    normalised_name = get_coll_normalised_name(c)
    colID = get_colID(normalised_name)
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    if normalised_name != c:
        redirect_to_url(req, normalised_name, apache.HTTP_MOVED_PERMANENTLY)

    # start display:
    req.content_type = "text/html"
    req.send_http_header()

    c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
        c_last_updated = perform_display_collection(colID, c, aas, ln, em,
                                            user_preferences.get('websearch_helpbox', 1))

    if em == "" or EM_REPOSITORY["body"] in em:
        try:
            title = get_coll_i18nname(c, ln)
        except:
            title = ""
    else:
        title = ""
    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p,
                show_header=em == "" or EM_REPOSITORY["header"] in em,
                show_footer=em == "" or EM_REPOSITORY["footer"] in em)
Exemple #8
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # start display:
    req.content_type = "text/html"
    req.send_http_header()
    # deduce collection id:
    colID = get_colID(get_coll_normalised_name(c))
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
        c_last_updated = perform_display_collection(colID, c, aas, ln, em,
                                            user_preferences.get('websearch_helpbox', 1))

    if em == "" or EM_REPOSITORY["body"] in em:
        try:
            title = get_coll_i18nname(c, ln)
        except:
            title = ""
    else:
        title = ""
    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p,
                show_header=em == "" or EM_REPOSITORY["header"] in em,
                show_footer=em == "" or EM_REPOSITORY["footer"] in em)
def display_collection(req, c, aas, verbose, ln):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({
        'aas': aas,
        'verbose': verbose,
        'ln': ln
    }, search_interface_default_urlargd)

    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(
                req,
                "../",
                text="You are not authorized to view this collection",
                navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # start display:
    req.content_type = "text/html"
    req.send_http_header()
    # deduce collection id:
    colID = get_colID(get_coll_normalised_name(c))
    if type(colID) is not int:
        page_body = '<p>' + (
            _("Sorry, collection %s does not seem to exist.") %
            ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (
            _("You may want to start browsing from %s.") %
            ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' +
             get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") +
                                 ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # wash `aas' argument:
    if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \
                          (CFG_CACHEDIR, colID, aas, ln)):
        # nonexistent `aas' asked for, fall back to Simple Search:
        aas = 0
    # display collection interface page:
    try:
        filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, aas, ln), "r")
        c_navtrail = filedesc.read()
        filedesc.close()
    except:
        c_navtrail = ""
    try:
        filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, aas, ln), "r")
        c_body = filedesc.read()
        filedesc.close()
    except:
        c_body = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_tp = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_tp = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_te = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_te = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_lt = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_lt = ""
    try:
        # show help boxes (usually located in "tr", "top right")
        # if users have not banned them in their preferences:
        c_portalbox_rt = ""
        if user_preferences.get('websearch_helpbox', 1) > 0:
            filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \
                            (CFG_CACHEDIR, colID, ln), "r")
            c_portalbox_rt = filedesc.read()
            filedesc.close()
    except:
        c_portalbox_rt = ""
    try:
        filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_last_updated = filedesc.read()
        filedesc.close()
    except:
        c_last_updated = ""
    try:
        title = get_coll_i18nname(c, ln)
    except:
        title = ""

    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p)