Exemple #1
0
    def sort_hits(self, local_pp_scores, urlparams):
        """
        Combine IR (Information Relevant) score given by Elasticsearch,
        with PP (Page Popularity) score, to sort the results
        """
        if not self.object_list:
            return
        hits = self.object_list.hits

        ir_scores = []
        pp_globl_scores = []
        pp_local_scores = []
        for h in hits:
            ir_scores.append(h.get('score', 0))
            pp_globl_scores.append(PagePopScore.objects.get_score(onion=h['domain']))
            pp_local_scores.append(local_pp_scores[h['domain']])

        ir_scores_norm = utils.normalize_on_max(ir_scores)
        pp_globl_scores_norm = utils.normalize_on_max(pp_globl_scores)
        pp_local_scores_norm = utils.normalize_on_max(pp_local_scores)

        if settings.DEBUG:
            assert len(ir_scores_norm) == len(pp_globl_scores_norm) == \
                   len(pp_local_scores_norm) == len(hits)

        for h, ir, pp, pl in zip(hits, ir_scores_norm, pp_globl_scores_norm,
                                 pp_local_scores_norm):
            h['score'] = heuristic_score(ir, pp, pl, urlparams)

        self.object_list.hits = sorted(hits, key=lambda k: k['score'], reverse=True)
Exemple #2
0
    def sort_hits(self, local_pp_scores, urlparams):
        """
        Combine IR (Information Relevant) score given by Elasticsearch,
        with PP (Page Popularity) score, to sort the results
        """
        if not self.object_list:
            return
        hits = self.object_list.hits

        ir_scores = []
        pp_globl_scores = []
        pp_local_scores = []
        for h in hits:
            ir_scores.append(h.get('score', 0))
            pp_globl_scores.append(
                PagePopScore.objects.get_score(onion=h['domain']))
            pp_local_scores.append(local_pp_scores[h['domain']])

        ir_scores_norm = utils.normalize_on_max(ir_scores)
        pp_globl_scores_norm = utils.normalize_on_max(pp_globl_scores)
        pp_local_scores_norm = utils.normalize_on_max(pp_local_scores)

        if settings.DEBUG:
            assert len(ir_scores_norm) == len(pp_globl_scores_norm) == \
                   len(pp_local_scores_norm) == len(hits)

        for h, ir, pp, pl in zip(hits, ir_scores_norm, pp_globl_scores_norm,
                                 pp_local_scores_norm):
            h['score'] = heuristic_score(ir, pp, pl, urlparams)

        self.object_list.hits = sorted(hits,
                                       key=lambda k: k['score'],
                                       reverse=True)
Exemple #3
0
    def sort_results(self):
        """
        Combine IR (Information Relevant) score given by Elasticsearch,
        with PP (Page Popularity) score, to sort the results
        """
        hits = self.object_list[1]  # object_list is tuple(int, list)
        if not hits:
            return

        ir_scores = [h.get('score', 0) for h in hits]
        pp_scores = [PagePopScore.objects.get_score(onion=h['domain'])
                     for h in hits]
        ir_scores_norm = utils.normalize_on_max(ir_scores)
        pp_scores_norm = utils.normalize_on_max(pp_scores)
        assert len(ir_scores_norm) == len(pp_scores_norm) == len(hits)

        for h, ir, pp in zip(hits, ir_scores_norm, pp_scores_norm):
            h['score'] = heuristic_score(ir, pp)

        self.object_list[1] = sorted(
            hits,
            key=lambda k: k['score'],
            reverse=True)