Example #1
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')

    # deduce collection id:
    collection = Collection.query.filter_by(name=c).first()
    colID = collection.id if collection else None
    normalised_name = collection.name if collection else c
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %(x_colname)s does not seem to exist.", x_colname='<strong>' + str(c) + '</strong>',)) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %(x_sitehref)s.", x_sitehref='<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.method == 'HEAD':
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %(x_colname)s Not Found", x_colname=cgi.escape(c)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    from flask import redirect, url_for
    return redirect(url_for('collections.collection', name=collection.name))
Example #2
0
 def name_ln(self):
     from invenio.legacy.search_engine import get_coll_i18nname
     return get_coll_i18nname(self.name, g.ln)
Example #3
0
 def name_ln(self):
     """Name ln."""
     from invenio.legacy.search_engine import get_coll_i18nname
     return get_coll_i18nname(self.name,
                              getattr(g, 'ln', cfg['CFG_SITE_LANG']))
Example #4
0
 def name_ln(self):
     """Name ln."""
     from invenio.legacy.search_engine import get_coll_i18nname
     return get_coll_i18nname(self.name,
                              getattr(g, 'ln', cfg['CFG_SITE_LANG']))
    def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln):
        """
        Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) ,
        and html_string being a formatted answer.
        """
        from invenio.legacy.search_engine import \
             get_permitted_restricted_collections, \
             get_coll_i18nname, \
             collection_i18nname_cache, \
             collection_restricted_p
        _ = gettext_set_language(ln)
        # stem search units. remove those with field
        # TODO: search in hosted collection names too
        # TODO: ignore unattached trees
        # TODO: use synonyms
        if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \
               (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME):
            return (0, '')

        words = [stem(unit[1], ln) for unit in search_units if unit[2] in ('', 'collection')] # Stemming

        if not words:
            return (0, '')

        permitted_restricted_collections = get_permitted_restricted_collections(user_info)
        cache = self.get_data_cache()

        matching_collections = {}
        for word in words:
            if CFG_CERN_SITE and word == 'cern':
                # This keyword is useless here...
                continue

            colls = cache.get(word.lower(), [])
            for coll in colls:
                if collection_restricted_p(coll) and \
                       not coll in permitted_restricted_collections:
                    # Skip restricted collection user do not have access
                    continue
                if not matching_collections.has_key(coll):
                    matching_collections[coll] = 0
                matching_collections[coll] += 1


        matching_collections_sorted = sorted(matching_collections.iteritems(), key=lambda (k, v): (v, k), reverse=True)
        if not matching_collections_sorted:
            return (0, '')

        matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \
                                      for coll, score in matching_collections_sorted]

        best_score = matching_collections_sorted[0][1]
        best_coll_words = whitespace_re.split(matching_collections_sorted[0][0])

        relevance = min(100, max(0, (100 * float(2 * best_score) /  float(len(best_coll_words) + len(words)) - 10)))

        if (('submit' in p.lower()) or (_('submit') in p.lower())) and \
               not (('submit' in best_coll_words) or (_('submit') in best_coll_words)):
            # User is probably looking for a submission. Decrease relevance
            relevance = max(0, relevance - 30)

        return (relevance, self.display_answer_helper(matching_collections_names, ln))
Example #6
0
 def name_ln(self):
     from invenio.legacy.search_engine import get_coll_i18nname
     return get_coll_i18nname(self.name, g.ln)
Example #7
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # start display:
    req.content_type = "text/html"
    req.send_http_header()
    # deduce collection id:
    colID = get_colID(get_coll_normalised_name(c))
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %(x_colname)s does not seem to exist.", x_colname='<strong>' + str(c) + '</strong>',)) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %(x_sitehref)s.", x_sitehref='<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.method == 'HEAD':
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %(x_colname)s Not Found", x_colname=cgi.escape(c)),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
        c_last_updated = perform_display_collection(colID, c, aas, ln, em,
                                            user_preferences.get('websearch_helpbox', 1))

    if em == "" or EM_REPOSITORY["body"] in em:
        try:
            title = get_coll_i18nname(c, ln)
        except:
            title = ""
    else:
        title = ""
    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p,
                show_header=em == "" or EM_REPOSITORY["header"] in em,
                show_footer=em == "" or EM_REPOSITORY["footer"] in em)
    def answer(self, req, user_info, of, cc, colls_to_search, p, f,
               search_units, ln):
        """
        Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) ,
        and html_string being a formatted answer.
        """
        from invenio.legacy.search_engine import \
             get_permitted_restricted_collections, \
             get_coll_i18nname, \
             collection_i18nname_cache, \
             collection_restricted_p
        _ = gettext_set_language(ln)
        # stem search units. remove those with field
        # TODO: search in hosted collection names too
        # TODO: ignore unattached trees
        # TODO: use synonyms
        if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \
               (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME):
            return (0, '')

        words = [
            stem(unit[1], ln) for unit in search_units
            if unit[2] in ('', 'collection')
        ]  # Stemming

        if not words:
            return (0, '')

        permitted_restricted_collections = get_permitted_restricted_collections(
            user_info)
        cache = self.get_data_cache()

        matching_collections = {}
        for word in words:
            if CFG_CERN_SITE and word == 'cern':
                # This keyword is useless here...
                continue

            colls = cache.get(word.lower(), [])
            for coll in colls:
                if collection_restricted_p(coll) and \
                       not coll in permitted_restricted_collections:
                    # Skip restricted collection user do not have access
                    continue
                if not matching_collections.has_key(coll):
                    matching_collections[coll] = 0
                matching_collections[coll] += 1

        matching_collections_sorted = sorted(matching_collections.iteritems(),
                                             key=lambda (k, v): (v, k),
                                             reverse=True)
        if not matching_collections_sorted:
            return (0, '')

        matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \
                                      for coll, score in matching_collections_sorted]

        best_score = matching_collections_sorted[0][1]
        best_coll_words = whitespace_re.split(
            matching_collections_sorted[0][0])

        relevance = min(
            100,
            max(0, (100 * float(2 * best_score) /
                    float(len(best_coll_words) + len(words)) - 10)))

        if (('submit' in p.lower()) or (_('submit') in p.lower())) and \
               not (('submit' in best_coll_words) or (_('submit') in best_coll_words)):
            # User is probably looking for a submission. Decrease relevance
            relevance = max(0, relevance - 30)

        return (relevance,
                self.display_answer_helper(matching_collections_names, ln))