def sort_hits(self, local_pp_scores, urlparams): """ Combine IR (Information Relevant) score given by Elasticsearch, with PP (Page Popularity) score, to sort the results """ if not self.object_list: return hits = self.object_list.hits ir_scores = [] pp_globl_scores = [] pp_local_scores = [] for h in hits: ir_scores.append(h.get('score', 0)) pp_globl_scores.append(PagePopScore.objects.get_score(onion=h['domain'])) pp_local_scores.append(local_pp_scores[h['domain']]) ir_scores_norm = utils.normalize_on_max(ir_scores) pp_globl_scores_norm = utils.normalize_on_max(pp_globl_scores) pp_local_scores_norm = utils.normalize_on_max(pp_local_scores) if settings.DEBUG: assert len(ir_scores_norm) == len(pp_globl_scores_norm) == \ len(pp_local_scores_norm) == len(hits) for h, ir, pp, pl in zip(hits, ir_scores_norm, pp_globl_scores_norm, pp_local_scores_norm): h['score'] = heuristic_score(ir, pp, pl, urlparams) self.object_list.hits = sorted(hits, key=lambda k: k['score'], reverse=True)
def sort_hits(self, local_pp_scores, urlparams): """ Combine IR (Information Relevant) score given by Elasticsearch, with PP (Page Popularity) score, to sort the results """ if not self.object_list: return hits = self.object_list.hits ir_scores = [] pp_globl_scores = [] pp_local_scores = [] for h in hits: ir_scores.append(h.get('score', 0)) pp_globl_scores.append( PagePopScore.objects.get_score(onion=h['domain'])) pp_local_scores.append(local_pp_scores[h['domain']]) ir_scores_norm = utils.normalize_on_max(ir_scores) pp_globl_scores_norm = utils.normalize_on_max(pp_globl_scores) pp_local_scores_norm = utils.normalize_on_max(pp_local_scores) if settings.DEBUG: assert len(ir_scores_norm) == len(pp_globl_scores_norm) == \ len(pp_local_scores_norm) == len(hits) for h, ir, pp, pl in zip(hits, ir_scores_norm, pp_globl_scores_norm, pp_local_scores_norm): h['score'] = heuristic_score(ir, pp, pl, urlparams) self.object_list.hits = sorted(hits, key=lambda k: k['score'], reverse=True)
def sort_results(self): """ Combine IR (Information Relevant) score given by Elasticsearch, with PP (Page Popularity) score, to sort the results """ hits = self.object_list[1] # object_list is tuple(int, list) if not hits: return ir_scores = [h.get('score', 0) for h in hits] pp_scores = [PagePopScore.objects.get_score(onion=h['domain']) for h in hits] ir_scores_norm = utils.normalize_on_max(ir_scores) pp_scores_norm = utils.normalize_on_max(pp_scores) assert len(ir_scores_norm) == len(pp_scores_norm) == len(hits) for h, ir, pp in zip(hits, ir_scores_norm, pp_scores_norm): h['score'] = heuristic_score(ir, pp) self.object_list[1] = sorted( hits, key=lambda k: k['score'], reverse=True)