def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None): """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model) rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' hitset, search engine hits; pattern, search engine query or record ID (you check the type) verbose, verbose level output: list of records list of rank values prefix postfix verbose_output""" voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: hitset = copy.deepcopy( hitset_global) #we are receiving a global hitset if not globals().has_key('methods'): create_rnkmethod_cache() function = methods[rank_method_code]["function"] #we get 'citation' method correctly here func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "pattern: %s <br/>" % str(pattern) if func_object and pattern and pattern[0][ 0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods) elif rank_method_code == "citation": #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit p = "" if pattern and pattern[0]: p = pattern[0][6:] result = find_citations(rank_method_code, p, hitset, verbose) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) else: result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) except Exception, e: register_exception() result = ( None, "", adderrorbox( "An error occured when trying to rank the search result " + rank_method_code, ["Unexpected error: %s<br />" % (e, )]), voutput)
def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None): """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model) rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' hitset, search engine hits; pattern, search engine query or record ID (you check the type) verbose, verbose level output: list of records list of rank values prefix postfix verbose_output""" voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset if not globals().has_key('methods'): create_rnkmethod_cache() function = methods[rank_method_code]["function"] #we get 'citation' method correctly here func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "pattern: %s <br/>" % str(pattern) if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods) elif rank_method_code == "citation": #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit p = "" if pattern and pattern[0]: p = pattern[0][6:] result = find_citations(rank_method_code, p, hitset, verbose) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount) else: result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose) except Exception, e: register_exception() result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)
def rank_records( rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field="", rg=None, jrec=None ): """Sorts given records or related records according to given method Parameters: - rank_method_code: Sort records using this method e.g. `jif' or `sbr' (word frequency vector model) - rank_limit_relevance: A parameter given to the sorting method e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' This is ignored when sorting by citations. But I don't know what it means. - hitset: records to sort - related_to: if specified, instead of sorting given records, we first fetch the related records ("related" being defined by the method), then we sort these related records - verbose, verbose level - field: stuff - rg: more stuff - jrec: even more stuff Output: - list of records - list of rank values - prefix, useless it is always '(' - postfix, useless it is always ')' - verbose_output """ voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: # We are receiving a global hitset hitset_global = hitset hitset = intbitset(hitset_global) if "methods" not in globals(): create_rnkmethod_cache() function = METHODS[rank_method_code]["function"] # Check if we have specific function for sorting by this method func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "related_to: %s <br/>" % str(related_to) if func_object and related_to and related_to[0][0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr( related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount ) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian( related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount ) else: result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) except Exception, e: register_exception() result = ( None, "", adderrorbox( "An error occured when trying to rank the search result " + rank_method_code, ["Unexpected error: %s<br />" % (e,)], ), voutput, )
def rank_records(rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field='', rg=None, jrec=None): """Sorts given records or related records according to given method Parameters: - rank_method_code: Sort records using this method e.g. `jif' or `sbr' (word frequency vector model) - rank_limit_relevance: A parameter given to the sorting method e.g. `23' for `nbc' (number of citations) or `0.10' for `vec' This is ignored when sorting by citations. But I don't know what it means. - hitset: records to sort - related_to: if specified, instead of sorting given records, we first fetch the related records ("related" being defined by the method), then we sort these related records - verbose, verbose level - field: stuff - rg: more stuff - jrec: even more stuff Output: - list of records - list of rank values - prefix, useless it is always '(' - postfix, useless it is always ')' - verbose_output """ voutput = "" configcreated = "" starttime = time.time() afterfind = starttime - time.time() aftermap = starttime - time.time() try: # We are receiving a global hitset hitset_global = hitset hitset = intbitset(hitset_global) if 'methods' not in globals(): create_rnkmethod_cache() function = METHODS[rank_method_code]["function"] # Check if we have specific function for sorting by this method func_object = globals().get(function) if verbose > 0: voutput += "function: %s <br/> " % function voutput += "related_to: %s <br/>" % str(related_to) if func_object and related_to and related_to[0][ 0:6] == "recid:" and function == "word_similarity": result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS) elif func_object: if function == "word_similarity": result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS) elif function in ("word_similarity_solr", "word_similarity_xapian"): if not rg: rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS if not jrec: jrec = 0 ranked_result_amount = rg + jrec if verbose > 0: voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount if verbose > 0: voutput += "field: %s<br/>" % field if function == "word_similarity_solr": if verbose > 0: voutput += "In Solr part:<br/>" result = word_similarity_solr(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount) if function == "word_similarity_xapian": if verbose > 0: voutput += "In Xapian part:<br/>" result = word_similarity_xapian(related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount) else: result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) else: result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose) except Exception, e: register_exception() result = ( None, "", adderrorbox( "An error occured when trying to rank the search result " + rank_method_code, ["Unexpected error: %s<br />" % (e, )]), voutput)