def addexistingportalbox(req, colID, ln=CFG_SITE_LANG, pbxID=-1, score=0, position='', sel_ln='', callback='yes', confirm=0): navtrail_previous_links = wsc.getnavtrail() + """> <a class="navtrail" href="%s/admin/websearch/websearchadmin.py/">WebSearch Admin</a> """ % (CFG_SITE_URL) try: uid = getUid(req) except: return error_page('Error', req) auth = check_user(req,'cfgwebsearch') if not auth[0]: return page(title="Edit Collection", body=wsc.perform_addexistingportalbox(colID=colID, ln=ln, pbxID=pbxID, score=score, position=position, sel_ln=sel_ln, callback=callback, confirm=confirm), uid=uid, language=ln, req=req, navtrail = navtrail_previous_links, lastupdated=__lastupdated__) else: return page(title='Authorization failure', uid=uid, body=adderrorbox('try to login first', datalist=["""You are not a user authorized to perform admin tasks, try to <a href="%s/youraccount/login?referer=%s/admin/websearch/websearchadmin.py/">login</a> with another account.""" % (CFG_SITE_SECURE_URL, CFG_SITE_URL)]), navtrail= navtrail_previous_links, lastupdated=__lastupdated__)
def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None): """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model) rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' hitset, search engine hits; pattern, search engine query or record ID (you check the type) verbose, verbose level output: list of records list of rank values prefix postfix verbose_output""" voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset if 'methods' not in globals(): create_rnkmethod_cache() function = methods[rank_method_code]["function"] #we get 'citation' method correctly here func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "pattern: %s <br/>" % str(pattern) if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods) elif rank_method_code == "citation": #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit p = "" if pattern and pattern[0]: p = pattern[0][6:] result = find_citations(rank_method_code, p, hitset, verbose) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) else: result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) except Exception as e: register_exception() result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput) afterfind = time.time() - starttime if result[0] and result[1]: #split into two lists for search_engine results_similar_recIDs = map(lambda x: x[0], result[0]) results_similar_relevances = map(lambda x: x[1], result[0]) result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s" % configcreated + result[3]) aftermap = time.time() - starttime; else: result = (None, None, result[1], result[2], result[3]) #add stuff from here into voutput from result tmp = voutput+result[4] if verbose > 0: tmp += "<br/>Elapsed time after finding: "+str(afterfind)+"\nElapsed after mapping: "+str(aftermap) result = (result[0],result[1],result[2],result[3],tmp) #dbg = string.join(map(str,methods[rank_method_code].items())) #result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput); return result
def rank_records(rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field='', rg=None, jrec=None): """Sorts given records or related records according to given method Parameters: - rank_method_code: Sort records using this method e.g. `jif' or `sbr' (word frequency vector model) - rank_limit_relevance: A parameter given to the sorting method e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' This is ignored when sorting by citations. But I don't know what it means. - hitset: records to sort - related_to: if specified, instead of sorting given records, we first fetch the related records ("related" being defined by the method), then we sort these related records - verbose, verbose level - field: stuff - rg: more stuff - jrec: even more stuff Output: - list of records - list of rank values - prefix, useless it is always '(' - postfix, useless it is always ')' - verbose_output """ voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: # We are receiving a global hitset hitset_global = hitset hitset = intbitset(hitset_global) if 'methods' not in globals(): create_rnkmethod_cache() function = METHODS[rank_method_code]["function"] # Check if we have specific function for sorting by this method func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "related_to: %s <br/>" % str(related_to) if func_object and related_to and related_to[0][0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount) else: result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) except Exception as e: register_exception() from invenio.legacy.webpage import adderrorbox result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput) afterfind = time.time() - starttime if result[0] and result[1]: #split into two lists for search_engine results_similar_recIDs = [x[0] for x in result[0]] results_similar_relevances = [x[1] for x in result[0]] result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s%s" % (configcreated, result[3])) aftermap = time.time() - starttime else: result = (None, None, result[1], result[2], result[3]) #add stuff from here into voutput from result tmp = voutput+result[4] if verbose > 0: tmp += "<br/>Elapsed time after finding: %s\nElapsed after mapping: %s" % (afterfind, aftermap) result = (result[0], result[1], result[2], result[3], tmp) #dbg = string.join(map(str,methods[rank_method_code].items())) #result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput) return result