def display_collection(req, c, aas, verbose, ln, em=""): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em}, search_interface_default_urlargd) if em != "": em = em.split(",") # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # deduce collection id: collection = Collection.query.filter_by(name=c).first() colID = collection.id if collection else None normalised_name = collection.name if collection else c if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %(x_colname)s does not seem to exist.", x_colname='<strong>' + str(c) + '</strong>',)) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %(x_sitehref)s.", x_sitehref='<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.method == 'HEAD': raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %(x_colname)s Not Found", x_colname=cgi.escape(c)), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') from flask import redirect, url_for return redirect(url_for('collections.collection', name=collection.name))
def name_ln(self): from invenio.legacy.search_engine import get_coll_i18nname return get_coll_i18nname(self.name, g.ln)
def name_ln(self): """Name ln.""" from invenio.legacy.search_engine import get_coll_i18nname return get_coll_i18nname(self.name, getattr(g, 'ln', cfg['CFG_SITE_LANG']))
def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln): """ Answer question given by context. Return (relevance, html_string) where relevance is integer from 0 to 100 indicating how relevant to the question the answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) , and html_string being a formatted answer. """ from invenio.legacy.search_engine import \ get_permitted_restricted_collections, \ get_coll_i18nname, \ collection_i18nname_cache, \ collection_restricted_p _ = gettext_set_language(ln) # stem search units. remove those with field # TODO: search in hosted collection names too # TODO: ignore unattached trees # TODO: use synonyms if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \ (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME): return (0, '') words = [stem(unit[1], ln) for unit in search_units if unit[2] in ('', 'collection')] # Stemming if not words: return (0, '') permitted_restricted_collections = get_permitted_restricted_collections(user_info) cache = self.get_data_cache() matching_collections = {} for word in words: if CFG_CERN_SITE and word == 'cern': # This keyword is useless here... continue colls = cache.get(word.lower(), []) for coll in colls: if collection_restricted_p(coll) and \ not coll in permitted_restricted_collections: # Skip restricted collection user do not have access continue if not matching_collections.has_key(coll): matching_collections[coll] = 0 matching_collections[coll] += 1 matching_collections_sorted = sorted(matching_collections.iteritems(), key=lambda (k, v): (v, k), reverse=True) if not matching_collections_sorted: return (0, '') matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \ for coll, score in matching_collections_sorted] best_score = matching_collections_sorted[0][1] best_coll_words = whitespace_re.split(matching_collections_sorted[0][0]) relevance = min(100, max(0, (100 * float(2 * best_score) / float(len(best_coll_words) + len(words)) - 10))) if (('submit' in p.lower()) or (_('submit') in p.lower())) and \ not (('submit' in best_coll_words) or (_('submit') in best_coll_words)): # User is probably looking for a submission. Decrease relevance relevance = max(0, relevance - 30) return (relevance, self.display_answer_helper(matching_collections_names, ln))
def display_collection(req, c, aas, verbose, ln, em=""): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em}, search_interface_default_urlargd) if em != "": em = em.split(",") # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(get_coll_normalised_name(c)) if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %(x_colname)s does not seem to exist.", x_colname='<strong>' + str(c) + '</strong>',)) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %(x_sitehref)s.", x_sitehref='<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.method == 'HEAD': raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %(x_colname)s Not Found", x_colname=cgi.escape(c)), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \ c_last_updated = perform_display_collection(colID, c, aas, ln, em, user_preferences.get('websearch_helpbox', 1)) if em == "" or EM_REPOSITORY["body"] in em: try: title = get_coll_i18nname(c, ln) except: title = "" else: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header(req.is_https()) else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p, show_header=em == "" or EM_REPOSITORY["header"] in em, show_footer=em == "" or EM_REPOSITORY["footer"] in em)
def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln): """ Answer question given by context. Return (relevance, html_string) where relevance is integer from 0 to 100 indicating how relevant to the question the answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) , and html_string being a formatted answer. """ from invenio.legacy.search_engine import \ get_permitted_restricted_collections, \ get_coll_i18nname, \ collection_i18nname_cache, \ collection_restricted_p _ = gettext_set_language(ln) # stem search units. remove those with field # TODO: search in hosted collection names too # TODO: ignore unattached trees # TODO: use synonyms if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \ (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME): return (0, '') words = [ stem(unit[1], ln) for unit in search_units if unit[2] in ('', 'collection') ] # Stemming if not words: return (0, '') permitted_restricted_collections = get_permitted_restricted_collections( user_info) cache = self.get_data_cache() matching_collections = {} for word in words: if CFG_CERN_SITE and word == 'cern': # This keyword is useless here... continue colls = cache.get(word.lower(), []) for coll in colls: if collection_restricted_p(coll) and \ not coll in permitted_restricted_collections: # Skip restricted collection user do not have access continue if not matching_collections.has_key(coll): matching_collections[coll] = 0 matching_collections[coll] += 1 matching_collections_sorted = sorted(matching_collections.iteritems(), key=lambda (k, v): (v, k), reverse=True) if not matching_collections_sorted: return (0, '') matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \ for coll, score in matching_collections_sorted] best_score = matching_collections_sorted[0][1] best_coll_words = whitespace_re.split( matching_collections_sorted[0][0]) relevance = min( 100, max(0, (100 * float(2 * best_score) / float(len(best_coll_words) + len(words)) - 10))) if (('submit' in p.lower()) or (_('submit') in p.lower())) and \ not (('submit' in best_coll_words) or (_('submit') in best_coll_words)): # User is probably looking for a submission. Decrease relevance relevance = max(0, relevance - 30) return (relevance, self.display_answer_helper(matching_collections_names, ln))