Beispiel #1
0
from nordlys.core.retrieval.elastic import Elastic

index_name = "cerc-expert"
query = ["climate", "change"]
elas = Elastic(index_name)
model = "LMJelinekMercer"
params = {"lambda": 0.1}
elas.update_similarity(model, params)
pr = elas.search(query, "content")['hits']
print(pr[0]['_score'])
print(pr[1]['_score'])
print(pr[2]['_score'])

mode1 = "BM25"
params1 = {"k1": 1.2, "b": 0.75}
elas1 = Elastic(index_name)
elas1.update_similarity(mode1, params1)
pr1 = elas.search(query, "content")['hits']
print(pr1[0]['_score'])
print(pr1[1]['_score'])
print(pr1[2]['_score'])
Beispiel #2
0
class LateFusionScorer(FusionScorer):
    def __init__(self,
                 index_name,
                 association_file,
                 assoc_mode,
                 retr_model,
                 retr_params,
                 num_docs=None,
                 field="content",
                 run_id="fusion",
                 num=100):
        """

        :param index_name: name of index
        :param association_file: document-object association file
        :param assoc_mode: document-object weight mode, uniform or binary
        :param retr_model: document-object weight mode, uniform or binary
        :param retr_params: parameter in similarity method
        """
        self._index_name = index_name
        self._field = field
        self._num_docs = num_docs
        self._elastic = Elastic(self._index_name)
        self._model = retr_model
        self._params = retr_params
        self._elastic.update_similarity(self._model, self._params)
        self.association_file = association_file
        self.assoc_doc = {}
        self.assoc_obj = {}
        self.run_id = run_id
        self._assoc_mode = assoc_mode
        self._num = num

    def score_query(self, query):
        """
        Scores a given query.

        :param query: query to be searched
        :return: pqo dict
        """
        # retrieving documents
        aquery = self._elastic.analyze_query(query)  # analyzed query
        res = self._elastic.search(aquery, self._field, num=self._num)

        # scoring objects, i.e., computing P(q|o)
        pqo = {}
        for i, item in enumerate(list(res.keys())):
            if self._num_docs is not None and i + 1 == self._num_docs:  # consider only top documents
                break
            doc_id = item
            doc_score = res[doc_id]
            if doc_id in self.assoc_doc:
                for object_id in self.assoc_doc[doc_id]:
                    if self._assoc_mode == FusionScorer.ASSOC_MODE_BINARY:
                        w_do = 1
                    elif self._assoc_mode == FusionScorer.ASSOC_MODE_UNIFORM:
                        w_do = 1 / len(self.assoc_obj[object_id])
                    else:
                        w_do = 0  # this should never happen
                    pqo[object_id] = pqo.get(object_id, 0) + doc_score * w_do

        return RetrievalResults(pqo)