def prepare_data_cache(self): """ "Index" submission names """ from invenio.websubmit_dblayer import get_categories_of_doctype res = run_sql("SELECT sdocname, ldocname FROM sbmDOCTYPE") # TODO: only consider submissions that are attached to the tree if CFG_CERN_SITE: for submission_name, submission_config in CERN_GENSBM_SUBMISSIONS_CONFIG.iteritems( ): if not submission_config.has_key('redirect'): res += (('GENSBM#' + nmtoken_from_string(cgi.escape(submission_name)), submission_name), ) cache = {} for doctype, submission_name in res: ## categories_and_submission_name = ' '.join(get_categories_of_doctype(doctype)) + \ ## ' ' + submission_name # Add submission name info if CFG_CERN_SITE and doctype in ('ALIPH', 'BULIS', 'CMSREL', 'BULBN', 'BSA'): # These submissions are not interesting here continue for word in clean_and_split_words_and_stem(submission_name): if not word.strip(): continue if not cache.has_key(word): cache[word] = [] item = (doctype, submission_name, '*') if not item in cache[word]: cache[word].append(item) # Add submission categories info if CFG_CERN_SITE and doctype in ('CMSPUB', 'CMSCOM', 'CMSCMC', 'ATLPUB', 'ATLCOM', 'ATLCMC', 'LHCBPB', 'LHCPCM', 'LHCBCC'): # These categories are not interesting here continue categories = get_categories_of_doctype(doctype) for dummy, category, dummy in categories: for word in clean_and_split_words_and_stem(submission_name + ' ' + category): if not word.strip(): continue if not cache.has_key(word): cache[word] = [] item = (doctype, "%s (%s)" % (category, submission_name), category) if not item in cache[word]: cache[word].append(item) return cache
def test_convert_string_to_nmtoken(self): """htmlutils - converting string to Nmtoken""" # TODO: possibly extend this test to include 'extenders' and # 'combining characters' as defined in # http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken ascii_str = "".join([chr(i) for i in range(0, 256)]) nmtoken = nmtoken_from_string(ascii_str) for char in nmtoken: self.assert_(char in ['.', '-', '_', ':'] or char.isalnum())
def tmpl_draw_subtitle(self, ln, title, subtitle, guideurl): """Draws an html title bar - 'title' *string* - The name of the titlebar - 'subtitle' *string* - The header name of the subtitle - 'guideurl' *string* - The relative url of the guide relative to this section """ _ = gettext_set_language(ln) guidetitle = _("See Guide") titlebar = """<a name="%s">""" % nmtoken_from_string(title) titlebar += """ </a>%s <small>""" % subtitle titlebar += """ [<a title="%s" href="%s/%s">?</a>]</small>""" % (guidetitle, CFG_SITE_URL, guideurl) return titlebar
def prepare_data_cache(self): """ "Index" submission names """ from invenio.websubmit_dblayer import get_categories_of_doctype res = run_sql("SELECT sdocname, ldocname FROM sbmDOCTYPE") # TODO: only consider submissions that are attached to the tree if CFG_CERN_SITE: for submission_name, submission_config in CERN_GENSBM_SUBMISSIONS_CONFIG.iteritems(): if not submission_config.has_key('redirect'): res += (('GENSBM#' + nmtoken_from_string(cgi.escape(submission_name)), submission_name),) cache = {} for doctype, submission_name in res: ## categories_and_submission_name = ' '.join(get_categories_of_doctype(doctype)) + \ ## ' ' + submission_name # Add submission name info if CFG_CERN_SITE and doctype in ('ALIPH', 'BULIS', 'CMSREL', 'BULBN', 'BSA'): # These submissions are not interesting here continue for word in clean_and_split_words_and_stem(submission_name): if not word.strip(): continue if not cache.has_key(word): cache[word] = [] item = (doctype, submission_name, '*') if not item in cache[word]: cache[word].append(item) # Add submission categories info if CFG_CERN_SITE and doctype in ('CMSPUB', 'CMSCOM', 'CMSCMC', 'ATLPUB', 'ATLCOM', 'ATLCMC', 'LHCBPB', 'LHCPCM', 'LHCBCC'): # These categories are not interesting here continue categories = get_categories_of_doctype(doctype) for dummy, category, dummy in categories: for word in clean_and_split_words_and_stem(submission_name + ' ' + category): if not word.strip(): continue if not cache.has_key(word): cache[word] = [] item = (doctype, "%s (%s)" % (category, submission_name), category) if not item in cache[word]: cache[word].append(item) return cache
def display_collection(req, c, aas, verbose, ln, em=""): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em}, search_interface_default_urlargd) if em != "": em = em.split(",") # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # deduce collection id: normalised_name = get_coll_normalised_name(c) colID = get_colID(normalised_name) if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') if normalised_name != c: redirect_to_url(req, normalised_name, apache.HTTP_MOVED_PERMANENTLY) # start display: req.content_type = "text/html" req.send_http_header() c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \ c_last_updated = perform_display_collection(colID, c, aas, ln, em, user_preferences.get('websearch_helpbox', 1)) if em == "" or EM_REPOSITORY["body"] in em: try: title = get_coll_i18nname(c, ln) except: title = "" else: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header(req.is_https()) else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p, show_header=em == "" or EM_REPOSITORY["header"] in em, show_footer=em == "" or EM_REPOSITORY["footer"] in em)
def display_collection(req, c, aas, verbose, ln, em=""): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em}, search_interface_default_urlargd) if em != "": em = em.split(",") # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(get_coll_normalised_name(c)) if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \ c_last_updated = perform_display_collection(colID, c, aas, ln, em, user_preferences.get('websearch_helpbox', 1)) if em == "" or EM_REPOSITORY["body"] in em: try: title = get_coll_i18nname(c, ln) except: title = "" else: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header(req.is_https()) else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p, show_header=em == "" or EM_REPOSITORY["header"] in em, show_footer=em == "" or EM_REPOSITORY["footer"] in em)
def display_collection(req, c, aas, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({ 'aas': aas, 'verbose': verbose, 'ln': ln }, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized( req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(get_coll_normalised_name(c)) if type(colID) is not int: page_body = '<p>' + ( _("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>' page_body = '<p>' + ( _("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # wash `aas' argument: if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln)): # nonexistent `aas' asked for, fall back to Simple Search: aas = 0 # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_navtrail = filedesc.read() filedesc.close() except: c_navtrail = "" try: filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_body = filedesc.read() filedesc.close() except: c_body = "" try: filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() except: c_portalbox_tp = "" try: filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() except: c_portalbox_te = "" try: filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() except: c_portalbox_lt = "" try: # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() except: c_portalbox_rt = "" try: filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() except: c_last_updated = "" try: title = get_coll_i18nname(c, ln) except: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header(req.is_https()) else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p)