Example #1
0
def rank_records(rank_method_code,
                 rank_limit_relevance,
                 hitset_global,
                 pattern=[],
                 verbose=0,
                 field='',
                 rg=None,
                 jrec=None):
    """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model)
       rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec'
       hitset, search engine hits;
       pattern, search engine query or record ID (you check the type)
       verbose, verbose level
       output:
       list of records
       list of rank values
       prefix
       postfix
       verbose_output"""

    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        hitset = copy.deepcopy(
            hitset_global)  #we are receiving a global hitset
        if not globals().has_key('methods'):
            create_rnkmethod_cache()

        function = methods[rank_method_code]["function"]
        #we get 'citation' method correctly here
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "pattern:  %s <br/>" % str(pattern)

        if func_object and pattern and pattern[0][
                0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, pattern[0][6:], hitset,
                                  rank_limit_relevance, verbose, methods)
        elif rank_method_code == "citation":
            #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit
            p = ""
            if pattern and pattern[0]:
                p = pattern[0][6:]
            result = find_citations(rank_method_code, p, hitset, verbose)

        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, pattern, hitset,
                                     rank_limit_relevance, verbose, methods)
            elif function in ("word_similarity_solr",
                              "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(pattern, hitset,
                                                  methods[rank_method_code],
                                                  verbose, field,
                                                  ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(pattern, hitset,
                                                    methods[rank_method_code],
                                                    verbose, field,
                                                    ranked_result_amount)
            else:
                result = func_object(rank_method_code, pattern, hitset,
                                     rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, pattern, hitset,
                                    rank_limit_relevance, verbose)
    except Exception, e:
        register_exception()
        result = (
            None, "",
            adderrorbox(
                "An error occured when trying to rank the search result " +
                rank_method_code,
                ["Unexpected error: %s<br />" % (e, )]), voutput)
def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None):
    """rank_method_code, e.g. `jif' or `sbr' (word frequency vector model)
       rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec'
       hitset, search engine hits;
       pattern, search engine query or record ID (you check the type)
       verbose, verbose level
       output:
       list of records
       list of rank values
       prefix
       postfix
       verbose_output"""

    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset
        if not globals().has_key('methods'):
            create_rnkmethod_cache()

        function = methods[rank_method_code]["function"]
        #we get 'citation' method correctly here
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "pattern:  %s <br/>" % str(pattern)

        if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods)
        elif rank_method_code == "citation":
            #we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit
            p = ""
            if pattern and pattern[0]:
                p = pattern[0][6:]
            result = find_citations(rank_method_code, p, hitset, verbose)

        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods)
            elif function in ("word_similarity_solr", "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
            else:
                result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
    except Exception, e:
        register_exception()
        result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)
def rank_records(
    rank_method_code, rank_limit_relevance, hitset, related_to=[], verbose=0, field="", rg=None, jrec=None
):
    """Sorts given records or related records according to given method

       Parameters:
        - rank_method_code: Sort records using this method
                            e.g. `jif' or `sbr' (word frequency vector model)
        - rank_limit_relevance: A parameter given to the sorting method
                                e.g. `23' for `nbc' (number of citations)
                                     or `0.10' for `vec'
                                     This is ignored when sorting by
                                     citations. But I don't know what it means.
        - hitset: records to sort
        - related_to: if specified, instead of sorting given records,
                      we first fetch the related records ("related" being
                      defined by the method), then we sort these related
                      records
        - verbose, verbose level
        - field: stuff
        - rg: more stuff
        - jrec: even more stuff

       Output:
       - list of records
       - list of rank values
       - prefix, useless it is always '('
       - postfix, useless it is always ')'
       - verbose_output
    """
    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        # We are receiving a global hitset
        hitset_global = hitset
        hitset = intbitset(hitset_global)

        if "methods" not in globals():
            create_rnkmethod_cache()

        function = METHODS[rank_method_code]["function"]
        # Check if we have specific function for sorting by this method
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "related_to:  %s <br/>" % str(related_to)

        if func_object and related_to and related_to[0][0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, related_to[0][6:], hitset, rank_limit_relevance, verbose, METHODS)
        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose, METHODS)
            elif function in ("word_similarity_solr", "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(
                        related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount
                    )
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(
                        related_to, hitset, METHODS[rank_method_code], verbose, field, ranked_result_amount
                    )
            else:
                result = func_object(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, related_to, hitset, rank_limit_relevance, verbose)
    except Exception, e:
        register_exception()
        result = (
            None,
            "",
            adderrorbox(
                "An error occured when trying to rank the search result " + rank_method_code,
                ["Unexpected error: %s<br />" % (e,)],
            ),
            voutput,
        )
Example #4
0
def rank_records(rank_method_code,
                 rank_limit_relevance,
                 hitset,
                 related_to=[],
                 verbose=0,
                 field='',
                 rg=None,
                 jrec=None):
    """Sorts given records or related records according to given method

       Parameters:
        - rank_method_code: Sort records using this method
                            e.g. `jif' or `sbr' (word frequency vector model)
        - rank_limit_relevance: A parameter given to the sorting method
                                e.g. `23' for `nbc' (number of citations)
                                     or `0.10' for `vec'
                                     This is ignored when sorting by
                                     citations. But I don't know what it means.
        - hitset: records to sort
        - related_to: if specified, instead of sorting given records,
                      we first fetch the related records ("related" being
                      defined by the method), then we sort these related
                      records
        - verbose, verbose level
        - field: stuff
        - rg: more stuff
        - jrec: even more stuff

       Output:
       - list of records
       - list of rank values
       - prefix, useless it is always '('
       - postfix, useless it is always ')'
       - verbose_output
    """
    voutput = ""
    configcreated = ""

    starttime = time.time()
    afterfind = starttime - time.time()
    aftermap = starttime - time.time()

    try:
        # We are receiving a global hitset
        hitset_global = hitset
        hitset = intbitset(hitset_global)

        if 'methods' not in globals():
            create_rnkmethod_cache()

        function = METHODS[rank_method_code]["function"]
        # Check if we have specific function for sorting by this method
        func_object = globals().get(function)

        if verbose > 0:
            voutput += "function: %s <br/> " % function
            voutput += "related_to:  %s <br/>" % str(related_to)

        if func_object and related_to and related_to[0][
                0:6] == "recid:" and function == "word_similarity":
            result = find_similar(rank_method_code, related_to[0][6:], hitset,
                                  rank_limit_relevance, verbose, METHODS)
        elif func_object:
            if function == "word_similarity":
                result = func_object(rank_method_code, related_to, hitset,
                                     rank_limit_relevance, verbose, METHODS)
            elif function in ("word_similarity_solr",
                              "word_similarity_xapian"):
                if not rg:
                    rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
                if not jrec:
                    jrec = 0
                ranked_result_amount = rg + jrec
                if verbose > 0:
                    voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount

                if verbose > 0:
                    voutput += "field: %s<br/>" % field

                if function == "word_similarity_solr":
                    if verbose > 0:
                        voutput += "In Solr part:<br/>"
                    result = word_similarity_solr(related_to, hitset,
                                                  METHODS[rank_method_code],
                                                  verbose, field,
                                                  ranked_result_amount)
                if function == "word_similarity_xapian":
                    if verbose > 0:
                        voutput += "In Xapian part:<br/>"
                    result = word_similarity_xapian(related_to, hitset,
                                                    METHODS[rank_method_code],
                                                    verbose, field,
                                                    ranked_result_amount)
            else:
                result = func_object(rank_method_code, related_to, hitset,
                                     rank_limit_relevance, verbose)
        else:
            result = rank_by_method(rank_method_code, related_to, hitset,
                                    rank_limit_relevance, verbose)
    except Exception, e:
        register_exception()
        result = (
            None, "",
            adderrorbox(
                "An error occured when trying to rank the search result " +
                rank_method_code,
                ["Unexpected error: %s<br />" % (e, )]), voutput)