Esempio n. 1
0
def addexistingportalbox(req, colID, ln=CFG_SITE_LANG, pbxID=-1, score=0, position='', sel_ln='', callback='yes', confirm=0):
    navtrail_previous_links = wsc.getnavtrail() + """&gt; <a class="navtrail" href="%s/admin/websearch/websearchadmin.py/">WebSearch Admin</a> """ % (CFG_SITE_URL)

    try:
        uid = getUid(req)
    except:
        return error_page('Error', req)

    auth = check_user(req,'cfgwebsearch')
    if not auth[0]:
        return page(title="Edit Collection",
                body=wsc.perform_addexistingportalbox(colID=colID,
                                                      ln=ln,
                                                      pbxID=pbxID,
                                                      score=score,
                                                      position=position,
                                                      sel_ln=sel_ln,
                                                      callback=callback,
                                                      confirm=confirm),
                uid=uid,
                language=ln,
                req=req,
                navtrail = navtrail_previous_links,
                lastupdated=__lastupdated__)
    else:
        return page(title='Authorization failure',
                uid=uid,
                body=adderrorbox('try to login first',
                                     datalist=["""You are not a user authorized to perform admin tasks, try to
                                     <a href="%s/youraccount/login?referer=%s/admin/websearch/websearchadmin.py/">login</a> with another account.""" % (CFG_SITE_SECURE_URL, CFG_SITE_URL)]),
                navtrail= navtrail_previous_links,
                lastupdated=__lastupdated__)
Esempio n. 2
0
def addexistingportalbox(req, colID, ln=CFG_SITE_LANG, pbxID=-1, score=0, position='', sel_ln='', callback='yes', confirm=0):
    navtrail_previous_links = wsc.getnavtrail() + """&gt; <a class="navtrail" href="%s/admin/websearch/websearchadmin.py/">WebSearch Admin</a> """ % (CFG_SITE_URL)

    try:
        uid = getUid(req)
    except:
        return error_page('Error', req)

    auth = check_user(req,'cfgwebsearch')
    if not auth[0]:
        return page(title="Edit Collection",
                body=wsc.perform_addexistingportalbox(colID=colID,
                                                      ln=ln,
                                                      pbxID=pbxID,
                                                      score=score,
                                                      position=position,
                                                      sel_ln=sel_ln,
                                                      callback=callback,
                                                      confirm=confirm),
                uid=uid,
                language=ln,
                req=req,
                navtrail = navtrail_previous_links,
                lastupdated=__lastupdated__)
    else:
        return page(title='Authorization failure',
                uid=uid,
                body=adderrorbox('try to login first',
                                     datalist=["""You are not a user authorized to perform admin tasks, try to
                                     <a href="%s/youraccount/login?referer=%s/admin/websearch/websearchadmin.py/">login</a> with another account.""" % (CFG_SITE_SECURE_URL, CFG_SITE_URL)]),
                navtrail= navtrail_previous_links,
                lastupdated=__lastupdated__)
Esempio n. 3
0
def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None):
    """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model)
       rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec'
       hitset, search engine hits;
       pattern, search engine query or record ID (you check the type)
       verbose, verbose level
       output:
       list of records
       list of rank values
       prefix
       postfix
       verbose_output"""

    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset
        if 'methods' not in globals():
            create_rnkmethod_cache()

        function = methods[rank_method_code]["function"]
        #we get 'citation' method correctly here
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "pattern:  %s <br/>" % str(pattern)

        if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods)
        elif rank_method_code == "citation":
            #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit
            p = ""
            if pattern and pattern[0]:
                p = pattern[0][6:]
            result = find_citations(rank_method_code, p, hitset, verbose)

        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods)
            elif function in ("word_similarity_solr", "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
            else:
                result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
    except Exception as e:
        register_exception()
        result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)

    afterfind = time.time() - starttime

    if result[0] and result[1]: #split into two lists for search_engine
        results_similar_recIDs = map(lambda x: x[0], result[0])
        results_similar_relevances = map(lambda x: x[1], result[0])
        result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s" % configcreated + result[3])
        aftermap = time.time() - starttime;
    else:
        result = (None, None, result[1], result[2], result[3])

    #add stuff from here into voutput from result
    tmp = voutput+result[4]
    if verbose > 0:
        tmp += "<br/>Elapsed time after finding: "+str(afterfind)+"\nElapsed after mapping: "+str(aftermap)
    result = (result[0],result[1],result[2],result[3],tmp)

    #dbg = string.join(map(str,methods[rank_method_code].items()))
    #result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput);
    return result
Esempio n. 4
0
def rank_records(rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field='', rg=None, jrec=None):
    """Sorts given records or related records according to given method

       Parameters:
        - rank_method_code: Sort records using this method
                            e.g. `jif' or `sbr' (word frequency vector model)
        - rank_limit_relevance: A parameter given to the sorting method
                                e.g. `23' for `nbc' (number of citations)
                                     or `0.10' for `vec'
                                     This is ignored when sorting by
                                     citations. But I don't know what it means.
        - hitset: records to sort
        - related_to: if specified, instead of sorting given records,
                      we first fetch the related records ("related" being
                      defined by the method), then we sort these related
                      records
        - verbose, verbose level
        - field: stuff
        - rg: more stuff
        - jrec: even more stuff

       Output:
       - list of records
       - list of rank values
       - prefix, useless it is always '('
       - postfix, useless it is always ')'
       - verbose_output
    """
    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        # We are receiving a global hitset
        hitset_global = hitset
        hitset = intbitset(hitset_global)

        if 'methods' not in globals():
            create_rnkmethod_cache()

        function = METHODS[rank_method_code]["function"]
        # Check if we have specific function for sorting by this method
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "related_to:  %s <br/>" % str(related_to)

        if func_object and related_to and related_to[0][0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS)
        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS)
            elif function in ("word_similarity_solr", "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount)
            else:
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
    except Exception as e:
        register_exception()
        from invenio.legacy.webpage import adderrorbox
        result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)

    afterfind = time.time() - starttime

    if result[0] and result[1]: #split into two lists for search_engine
        results_similar_recIDs = [x[0] for x in result[0]]
        results_similar_relevances = [x[1] for x in result[0]]
        result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s%s" % (configcreated, result[3]))
        aftermap = time.time() - starttime
    else:
        result = (None, None, result[1], result[2], result[3])

    #add stuff from here into voutput from result
    tmp = voutput+result[4]
    if verbose > 0:
        tmp += "<br/>Elapsed time after finding: %s\nElapsed after mapping: %s" % (afterfind, aftermap)
    result = (result[0], result[1], result[2], result[3], tmp)

    #dbg = string.join(map(str,methods[rank_method_code].items()))
    #result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput)
    return result
Esempio n. 5
0
def rank_records(rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field='', rg=None, jrec=None):
    """Sorts given records or related records according to given method

       Parameters:
        - rank_method_code: Sort records using this method
                            e.g. `jif' or `sbr' (word frequency vector model)
        - rank_limit_relevance: A parameter given to the sorting method
                                e.g. `23' for `nbc' (number of citations)
                                     or `0.10' for `vec'
                                     This is ignored when sorting by
                                     citations. But I don't know what it means.
        - hitset: records to sort
        - related_to: if specified, instead of sorting given records,
                      we first fetch the related records ("related" being
                      defined by the method), then we sort these related
                      records
        - verbose, verbose level
        - field: stuff
        - rg: more stuff
        - jrec: even more stuff

       Output:
       - list of records
       - list of rank values
       - prefix, useless it is always '('
       - postfix, useless it is always ')'
       - verbose_output
    """
    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        # We are receiving a global hitset
        hitset_global = hitset
        hitset = intbitset(hitset_global)

        if 'methods' not in globals():
            create_rnkmethod_cache()

        function = METHODS[rank_method_code]["function"]
        # Check if we have specific function for sorting by this method
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "related_to:  %s <br/>" % str(related_to)

        if func_object and related_to and related_to[0][0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS)
        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS)
            elif function in ("word_similarity_solr", "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount)
            else:
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
    except Exception as e:
        register_exception()
        from invenio.legacy.webpage import adderrorbox
        result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)

    afterfind = time.time() - starttime

    if result[0] and result[1]: #split into two lists for search_engine
        results_similar_recIDs = [x[0] for x in result[0]]
        results_similar_relevances = [x[1] for x in result[0]]
        result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s%s" % (configcreated, result[3]))
        aftermap = time.time() - starttime
    else:
        result = (None, None, result[1], result[2], result[3])

    #add stuff from here into voutput from result
    tmp = voutput+result[4]
    if verbose > 0:
        tmp += "<br/>Elapsed time after finding: %s\nElapsed after mapping: %s" % (afterfind, aftermap)
    result = (result[0], result[1], result[2], result[3], tmp)

    #dbg = string.join(map(str,methods[rank_method_code].items()))
    #result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput)
    return result