def __type_centric(self, query): """Type-centric TTI. :param query: query string :type query: str """ types = dict() model = self.__config.get("model", TTI_MODEL_BM25) elastic = ElasticCache( self.__tc_config.get("index", DEFAULT_TTI_TC_INDEX)) if model == TTI_MODEL_BM25: PLOGGER.info("TTI, TC, BM25") self.__tc_config["model"] = "bm25" # scorer = Scorer.get_scorer(elastic, query, self.__tc_config) types = Retrieval(self.__tc_config).retrieve(query) elif model == TTI_MODEL_LM: PLOGGER.debug("TTI, TC, LM") self.__tc_config["model"] = "lm" # Needed for 2nd-pass self.__tc_config["field"] = "content" # Needed for 2nd-pass self.__tc_config["second_pass"] = {"field": "content"} for param in ["smoothing_method", "smoothing_param"]: if self.__config.get(param, None) is not None: self.__tc_config["second_pass"][param] = self.__config.get( param) scorer = Scorer.get_scorer(elastic, query, self.__tc_config) types = Retrieval(self.__tc_config).retrieve(query, scorer) PLOGGER.info(types) return types
def _first_pass_scoring(self, analyzed_query): """Returns first-pass scoring of documents. :param analyzed_query: analyzed query :return: RetrievalResults object """ PLOGGER.debug("\tFirst pass scoring... ", ) res1 = self.__elastic.search(analyzed_query, self.__first_pass_field, num=self.__first_pass_num_docs, fields_return=self.__first_pass_fields_return) return res1
def entity_linking(): query = request.args.get("q", None) if query is None: return error("Query is not specified.") config = { "method": request.args.get("method", None), "threshold": request.args.get("threshold", 0.1) } el = EL(config, __entity, __elastic, __fcache) res = el.link(query) PLOGGER.debug(res) return jsonify(**res)
def _second_pass_scoring(self, res1, scorer): """Returns second-pass scoring of documents. :param res1: first pass results :param scorer: scorer object :return: RetrievalResults object """ PLOGGER.debug("\tSecond pass scoring... ", ) for field in self.__get_fields(): self.__elastic.multi_termvector(list(res1.keys()), field) res2 = {} for doc_id in res1.keys(): res2[doc_id] = {"score": scorer.score_doc(doc_id), "fields": res1[doc_id].get("fields", {})} PLOGGER.debug("done") return res2
def __get_top_n(self, fields_freq, n): """Sorts fields and returns top-n.""" sorted_fields = sorted(fields_freq.items(), key=lambda item: (item[1], item[0]), reverse=True) top_fields = dict() i = 0 for field, freq in sorted_fields: if i >= n: break if field in self.__fsdm_fields: continue i += 1 top_fields[field] = freq if self.DEBUG: print("(" + field + ", " + str(freq) + ")") if self.DEBUG: PLOGGER.debug("\nNumber of fields:", len(top_fields), "\n") return top_fields
def get_scorer(elastic, query, config): """Returns Scorer object (Scorer factory). :param elastic: Elastic object :param query: raw query (to be analyzed) :param config: dict with models parameters """ model = config.get("model", None) if model == "lm": PLOGGER.debug("\tLM scoring ... ") return ScorerLM(elastic, query, config) elif model == "mlm": PLOGGER.debug("\tMLM scoring ...") return ScorerMLM(elastic, query, config) elif model == "prms": PLOGGER.debug("\tPRMS scoring ...") return ScorerPRMS(elastic, query, config) elif model is None: return None else: raise Exception("Unknown model " + model)