Ejemplo n.º 1
0
def late(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Late journals", req=req)
    for journal in CFG_JOURNALS:
        print >> req, "<h2>%s</h2>" % escape(get_coll_i18nname(journal))
        results = get_collection_reclist(journal)
        print >> req, "<table>"
        print >> req, "<tr><th>DOI</th><th>Title</th><th>DOI registration</th><th>Arrival in SCOAP3</th></tr>"
        for recid in results:
            creation_date = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (recid, ))[0][0]
            record = get_record(recid)
            doi = record_get_field_value(record, '024', '7', code='a')
            title = record_get_field_value(record, '245', code='a')
            doi_date = run_sql("SELECT creation_date FROM doi WHERE doi=%s", (doi, ))
            background = "#eee"
            if doi_date:
                doi_date = doi_date[0][0]
                if (creation_date - doi_date).days < 0:
                    background = "#66FF00"
                elif (creation_date - doi_date).days < 1:
                    background = "#FF6600"
                else:
                    background = "#FF0000"
            else:
                doi_date = ''
            print >> req, '<tr style="background-color: %s;"><td><a href="http://dx.doi.org/%s" target="_blank">%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (
                    background,
                    escape(doi, True),
                    escape(doi),
                    title,
                    doi_date,
                    creation_date)
        print >> req, "</table>"
Ejemplo n.º 2
0
def csv(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = 'attachment; filename=scoap3.csv'
    header = ','.join(['Nation'] + [get_coll_i18nname(journal) for journal in CFG_JOURNALS])
    print >> req, header
    for nation_tuple in _CFG_NATION_MAP:
        query = _build_query(nation_tuple)
        line = ','.join([nation_tuple[0]] + [str(len(perform_request_search(p=query, cc=journal, of='intbitset'))) for journal in CFG_JOURNALS])
        print >> req, line
Ejemplo n.º 3
0
def articles(req, i, mode='html'):
    try:
        i = int(i)
        assert 0 <= i < len(_AFFILIATIONS)
    except:
        raise SERVER_RETURN(HTTP_BAD_REQUEST)
    nation = _AFFILIATIONS[i]
    ret = []
    page_title = "SCOAP3 Articles by authors from %s" % nation
    if mode == 'text':
        req.content_type = "text/plain; charset=utf8"
        req.headers_out['content-disposition'] = ('attachment; filename=%s.txt'
                                                  % nation)
    else:
        req.content_type = "text/html"
    if mode == 'text':
        print >> req, page_title
        print >> req, "-" * len(page_title)
    query = _build_query(nation)
    for journal in CFG_JOURNALS:
        results = perform_request_search(p=query, cc=journal, of='intbitset')
        if not results:
            continue
        ret.append("<h2>%s (%s)</h2" % (escape(get_coll_i18nname(journal)),
                                        len(results)))
        ret.append("<p><ul>")
        if mode == 'text':
            print >> req, ""
            print >> req, get_coll_i18nname(journal)
        for recid in results:
            record = get_record(recid)
            title = record_get_field_value(record, '245', code='a')
            doi = record_get_field_value(record, '024', '7', code='a')
            if mode == 'text':
                print >> req, "http://dx.doi.org/%s" % doi

            li = ("<li><a href='http://dx.doi.org/{0}' "
                  "target='_blank'>{1}</a>: {2}</li>")
            ret.append(li.format(escape(doi, True), escape(doi), title))
        ret.append("</ul></p>")
    body = '\n'.join(ret)
    if mode == 'text':
        return ""
    return page(req=req, title=page_title, body=body)
Ejemplo n.º 4
0
def articles(req, i, mode='html'):
    try:
        i = int(i)
        assert 0 <= i < len(_AFFILIATIONS)
    except:
        raise SERVER_RETURN(HTTP_BAD_REQUEST)
    nation = _AFFILIATIONS[i]
    ret = []
    page_title = "SCOAP3 Articles by authors from %s" % nation
    if mode == 'text':
        req.content_type = "text/plain; charset=utf8"
        req.headers_out['content-disposition'] = ('attachment; filename=%s.txt'
                                                  % nation)
    else:
        req.content_type = "text/html"
    if mode == 'text':
        print >> req, page_title
        print >> req, "-" * len(page_title)
    query = _build_query(nation)
    for journal in CFG_JOURNALS:
        results = perform_request_search(p=query, cc=journal, of='intbitset')
        if not results:
            continue
        ret.append("<h2>%s (%s)</h2" % (escape(get_coll_i18nname(journal)),
                                        len(results)))
        ret.append("<p><ul>")
        if mode == 'text':
            print >> req, ""
            print >> req, get_coll_i18nname(journal)
        for recid in results:
            record = get_record(recid)
            title = record_get_field_value(record, '245', code='a')
            doi = record_get_field_value(record, '024', '7', code='a')
            if mode == 'text':
                print >> req, "http://dx.doi.org/%s" % doi

            li = ("<li><a href='http://dx.doi.org/{0}' "
                  "target='_blank'>{1}</a>: {2}</li>")
            ret.append(li.format(escape(doi, True), escape(doi), title))
        ret.append("</ul></p>")
    body = '\n'.join(ret)
    if mode == 'text':
        return ""
    return page(req=req, title=page_title, body=body)
Ejemplo n.º 5
0
def csv(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = 'attachment; filename=scoap3.csv'
    header = (','.join(['Nation']
              + [get_coll_i18nname(journal) for journal in CFG_JOURNALS]))
    print >> req, header
    for nation in _AFFILIATIONS:
        query = _build_query(nation)
        line = (','.join([nation]
                + [str(len(perform_request_search(p=query,
                                                  cc=journal,
                                                  of='intbitset')))
                   for journal in CFG_JOURNALS]))
        print >> req, line
Ejemplo n.º 6
0
def us_affiliations_csv(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = 'attachment; filename=us_aff.csv'
    header = ';'.join(['University'] + [get_coll_i18nname(journal) for journal in CFG_JOURNALS] + ['sum'])
    print >> req, header
    for university in sorted(CFG_SELECTED_AFF):
        line = university
        count = 0
        search = create_search_from_affiliation(university)
        for collection in CFG_JOURNALS:
            res = perform_request_search(p='/%s/' % (search,), c=collection)
            line = line + ";" + str(len(res))
            count = count + len(res)
        print >> req, line+";"+str(count)
Ejemplo n.º 7
0
def main():
    for journal in CFG_JOURNALS:
        name = get_coll_i18nname(journal)
        reclist = get_collection_reclist(journal)
        print "<h2>%s</h2>" % escape(name)
        if not reclist:
            print "<p>None yet.</p>"
            continue
        print "<p><ul>"
        for recid in reclist:
            record = get_record(recid)
            title = remove_html_markup(record_get_field_value(record, '245', code='a'), remove_escaped_chars_p=False).strip()
            doi = record_get_field_value(record, '024', '7', code='a')
            print '<li><a href="http://dx.doi.org/%s" target="_blank">%s</a>: %s</li>' % (escape(doi, True), escape(doi), title)
        print "</ul></p>"
Ejemplo n.º 8
0
def us_affiliations_csv(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = 'attachment; filename=us_aff.csv'
    header = (';'.join(['University']
              + [get_coll_i18nname(journal) for journal in CFG_JOURNALS]
              + ['sum']))
    print >> req, header
    for university in sorted(CFG_SELECTED_AFF):
        line = university
        count = 0
        search = create_search_from_affiliation(university)
        for collection in CFG_JOURNALS:
            res = perform_request_search(p='/%s/' % (search,), c=collection)
            line = line + ";" + str(len(res))
            count = count + len(res)
        print >> req, line+";"+str(count)
Ejemplo n.º 9
0
def late(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Late journals", req=req)

    th = ("<tr><th>DOI</th><th>Title</th><th>DOI registration</th>"
          "<th>Arrival in SCOAP3</th></tr>")
    tr = ("<tr style='background-color: {0};'><td>"
          "<a href='http://dx.doi.org/{1}' target='_blank'>{2}</td>"
          "<td>{3}</td><td>{4}</td><td>{5}</td></tr>")

    sql_bibrec = "SELECT creation_date FROM bibrec WHERE id=%s"
    sql_doi = "SELECT creation_date FROM doi WHERE doi=%s"

    for journal in CFG_JOURNALS:
        print >> req, "<h2>%s</h2>" % escape(get_coll_i18nname(journal))
        results = get_collection_reclist(journal)
        print >> req, "<table>"
        print >> req, th
        for recid in results:
            creation_date = run_sql(sql_bibrec, (recid, ))[0][0]
            record = get_record(recid)
            doi = record_get_field_value(record, '024', '7', code='a')
            title = record_get_field_value(record, '245', code='a')
            doi_date = run_sql(sql_doi, (doi, ))
            background = "#eee"
            if doi_date:
                doi_date = doi_date[0][0]
                if (creation_date - doi_date).days < 0:
                    background = "#66FF00"
                elif (creation_date - doi_date).days < 1:
                    background = "#FF6600"
                else:
                    background = "#FF0000"
            else:
                doi_date = ''
            print >> req, tr.format(background,
                                    escape(doi, True),
                                    escape(doi),
                                    title,
                                    doi_date,
                                    creation_date)
        print >> req, "</table>"
Ejemplo n.º 10
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')

    # deduce collection id:
    normalised_name = get_coll_normalised_name(c)
    colID = get_colID(normalised_name)
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    if normalised_name != c:
        redirect_to_url(req, normalised_name, apache.HTTP_MOVED_PERMANENTLY)

    # start display:
    req.content_type = "text/html"
    req.send_http_header()

    c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
        c_last_updated = perform_display_collection(colID, c, aas, ln, em,
                                            user_preferences.get('websearch_helpbox', 1))

    if em == "" or EM_REPOSITORY["body"] in em:
        try:
            title = get_coll_i18nname(c, ln)
        except:
            title = ""
    else:
        title = ""
    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p,
                show_header=em == "" or EM_REPOSITORY["header"] in em,
                show_footer=em == "" or EM_REPOSITORY["footer"] in em)
Ejemplo n.º 11
0
def display_collection(req, c, aas, verbose, ln, em=""):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
                                    search_interface_default_urlargd)

    if em != "":
        em = em.split(",")
    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this collection",
                                       navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # start display:
    req.content_type = "text/html"
    req.send_http_header()
    # deduce collection id:
    colID = get_colID(get_coll_normalised_name(c))
    if type(colID) is not int:
        page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')

    c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
        c_last_updated = perform_display_collection(colID, c, aas, ln, em,
                                            user_preferences.get('websearch_helpbox', 1))

    if em == "" or EM_REPOSITORY["body"] in em:
        try:
            title = get_coll_i18nname(c, ln)
        except:
            title = ""
    else:
        title = ""
    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p,
                show_header=em == "" or EM_REPOSITORY["header"] in em,
                show_footer=em == "" or EM_REPOSITORY["footer"] in em)
Ejemplo n.º 12
0
 def name_ln(self):
     from invenio.search_engine import get_coll_i18nname
     return get_coll_i18nname(self.name, g.ln).decode('utf-8')
    def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln):
        """
        Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) ,
        and html_string being a formatted answer.
        """
        from invenio.search_engine import \
             get_permitted_restricted_collections, \
             get_coll_i18nname, \
             collection_i18nname_cache, \
             collection_restricted_p
        _ = gettext_set_language(ln)
        # stem search units. remove those with field
        # TODO: search in hosted collection names too
        # TODO: ignore unattached trees
        # TODO: use synonyms
        if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \
               (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME):
            return (0, '')

        words = [stem(unit[1], ln) for unit in search_units if unit[2] in ('', 'collection')] # Stemming

        if not words:
            return (0, '')

        permitted_restricted_collections = get_permitted_restricted_collections(user_info)
        cache = self.get_data_cache()

        matching_collections = {}
        for word in words:
            if CFG_CERN_SITE and word == 'cern':
                # This keyword is useless here...
                continue

            colls = cache.get(word.lower(), [])
            for coll in colls:
                if collection_restricted_p(coll) and \
                       not coll in permitted_restricted_collections:
                    # Skip restricted collection user do not have access
                    continue
                if not matching_collections.has_key(coll):
                    matching_collections[coll] = 0
                matching_collections[coll] += 1


        matching_collections_sorted = sorted(matching_collections.iteritems(), key=lambda (k, v): (v, k), reverse=True)
        if not matching_collections_sorted:
            return (0, '')

        matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \
                                      for coll, score in matching_collections_sorted]

        best_score = matching_collections_sorted[0][1]
        best_coll_words = whitespace_re.split(matching_collections_sorted[0][0])

        relevance = min(100, max(0, (100 * float(2 * best_score) /  float(len(best_coll_words) + len(words)) - 10)))

        if (('submit' in p.lower()) or (_('submit') in p.lower())) and \
               not (('submit' in best_coll_words) or (_('submit') in best_coll_words)):
            # User is probably looking for a submission. Decrease relevance
            relevance = max(0, relevance - 30)

        return (relevance, self.display_answer_helper(matching_collections_names, ln))
Ejemplo n.º 14
0
def display_collection(req, c, aas, verbose, ln):
    """Display search interface page for collection c by looking
    in the collection cache."""
    _ = gettext_set_language(ln)

    req.argd = drop_default_urlargd({
        'aas': aas,
        'verbose': verbose,
        'ln': ln
    }, search_interface_default_urlargd)

    # get user ID:
    try:
        uid = getUid(req)
        user_preferences = {}
        if uid == -1:
            return page_not_authorized(
                req,
                "../",
                text="You are not authorized to view this collection",
                navmenuid='search')
        elif uid > 0:
            user_preferences = get_user_preferences(uid)
    except Error:
        register_exception(req=req, alert_admin=True)
        return page(title=_("Internal Error"),
                    body=create_error_box(req, verbose=verbose, ln=ln),
                    description="%s - Internal Error" % CFG_SITE_NAME,
                    keywords="%s, Internal Error" % CFG_SITE_NAME,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # start display:
    req.content_type = "text/html"
    req.send_http_header()
    # deduce collection id:
    colID = get_colID(get_coll_normalised_name(c))
    if type(colID) is not int:
        page_body = '<p>' + (
            _("Sorry, collection %s does not seem to exist.") %
            ('<strong>' + str(c) + '</strong>')) + '</p>'
        page_body = '<p>' + (
            _("You may want to start browsing from %s.") %
            ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' +
             get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
        if req.header_only:
            raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
        return page(title=_("Collection %s Not Found") % cgi.escape(c),
                    body=page_body,
                    description=(CFG_SITE_NAME + ' - ' + _("Not found") +
                                 ': ' + cgi.escape(str(c))),
                    keywords="%s" % CFG_SITE_NAME,
                    uid=uid,
                    language=ln,
                    req=req,
                    navmenuid='search')
    # wash `aas' argument:
    if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \
                          (CFG_CACHEDIR, colID, aas, ln)):
        # nonexistent `aas' asked for, fall back to Simple Search:
        aas = 0
    # display collection interface page:
    try:
        filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, aas, ln), "r")
        c_navtrail = filedesc.read()
        filedesc.close()
    except:
        c_navtrail = ""
    try:
        filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, aas, ln), "r")
        c_body = filedesc.read()
        filedesc.close()
    except:
        c_body = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_tp = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_tp = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_te = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_te = ""
    try:
        filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_portalbox_lt = filedesc.read()
        filedesc.close()
    except:
        c_portalbox_lt = ""
    try:
        # show help boxes (usually located in "tr", "top right")
        # if users have not banned them in their preferences:
        c_portalbox_rt = ""
        if user_preferences.get('websearch_helpbox', 1) > 0:
            filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \
                            (CFG_CACHEDIR, colID, ln), "r")
            c_portalbox_rt = filedesc.read()
            filedesc.close()
    except:
        c_portalbox_rt = ""
    try:
        filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \
                        (CFG_CACHEDIR, colID, ln), "r")
        c_last_updated = filedesc.read()
        filedesc.close()
    except:
        c_last_updated = ""
    try:
        title = get_coll_i18nname(c, ln)
    except:
        title = ""

    show_title_p = True
    body_css_classes = []
    if c == CFG_SITE_NAME:
        # Do not display title on home collection
        show_title_p = False
        body_css_classes.append('home')

    if len(collection_reclist_cache.cache.keys()) == 1:
        # if there is only one collection defined, do not print its
        # title on the page as it would be displayed repetitively.
        show_title_p = False

    if aas == -1:
        show_title_p = False

    if CFG_INSPIRE_SITE == 1:
        # INSPIRE should never show title, but instead use css to
        # style collections
        show_title_p = False
        body_css_classes.append(nmtoken_from_string(c))

    # RSS:
    rssurl = CFG_SITE_URL + '/rss'
    rssurl_params = []
    if c != CFG_SITE_NAME:
        rssurl_params.append('cc=' + quote(c))
    if ln != CFG_SITE_LANG and \
           c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
        rssurl_params.append('ln=' + ln)

    if rssurl_params:
        rssurl += '?' + '&amp;'.join(rssurl_params)

    if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
        metaheaderadd = get_mathjax_header(req.is_https())
    else:
        metaheaderadd = ''

    return page(title=title,
                body=c_body,
                navtrail=c_navtrail,
                description="%s - %s" % (CFG_SITE_NAME, c),
                keywords="%s, %s" % (CFG_SITE_NAME, c),
                metaheaderadd=metaheaderadd,
                uid=uid,
                language=ln,
                req=req,
                cdspageboxlefttopadd=c_portalbox_lt,
                cdspageboxrighttopadd=c_portalbox_rt,
                titleprologue=c_portalbox_tp,
                titleepilogue=c_portalbox_te,
                lastupdated=c_last_updated,
                navmenuid='search',
                rssurl=rssurl,
                body_css_classes=body_css_classes,
                show_title_p=show_title_p)
    def answer(self, req, user_info, of, cc, colls_to_search, p, f,
               search_units, ln):
        """
        Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details) ,
        and html_string being a formatted answer.
        """
        from invenio.search_engine import \
             get_permitted_restricted_collections, \
             get_coll_i18nname, \
             collection_i18nname_cache, \
             collection_restricted_p
        _ = gettext_set_language(ln)
        # stem search units. remove those with field
        # TODO: search in hosted collection names too
        # TODO: ignore unattached trees
        # TODO: use synonyms
        if f or (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH < 0) or \
               (CFG_WEBSEARCH_COLLECTION_NAMES_SEARCH == 0 and cc != CFG_SITE_NAME):
            return (0, '')

        words = [
            stem(unit[1], ln) for unit in search_units
            if unit[2] in ('', 'collection')
        ]  # Stemming

        if not words:
            return (0, '')

        permitted_restricted_collections = get_permitted_restricted_collections(
            user_info)
        cache = self.get_data_cache()

        matching_collections = {}
        for word in words:
            if CFG_CERN_SITE and word == 'cern':
                # This keyword is useless here...
                continue

            colls = cache.get(word.lower(), [])
            for coll in colls:
                if collection_restricted_p(coll) and \
                       not coll in permitted_restricted_collections:
                    # Skip restricted collection user do not have access
                    continue
                if not matching_collections.has_key(coll):
                    matching_collections[coll] = 0
                matching_collections[coll] += 1

        matching_collections_sorted = sorted(matching_collections.iteritems(),
                                             key=lambda (k, v): (v, k),
                                             reverse=True)
        if not matching_collections_sorted:
            return (0, '')

        matching_collections_names = [(get_coll_i18nname(coll, ln, False), CFG_SITE_URL + '/collection/' + urllib.quote(coll, safe='') + '?ln=en') \
                                      for coll, score in matching_collections_sorted]

        best_score = matching_collections_sorted[0][1]
        best_coll_words = whitespace_re.split(
            matching_collections_sorted[0][0])

        relevance = min(
            100,
            max(0, (100 * float(2 * best_score) /
                    float(len(best_coll_words) + len(words)) - 10)))

        if (('submit' in p.lower()) or (_('submit') in p.lower())) and \
               not (('submit' in best_coll_words) or (_('submit') in best_coll_words)):
            # User is probably looking for a submission. Decrease relevance
            relevance = max(0, relevance - 30)

        return (relevance,
                self.display_answer_helper(matching_collections_names, ln))