コード例 #1
0
 def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                d2: Document):
     qterms = set(query.tf.keys())
     sd1 = ctx.f.closest_grouping_size_and_count(qterms, d1.termseq)
     sd2 = ctx.f.closest_grouping_size_and_count(qterms, d2.termseq)
     if sd1[0] == sd2[0]:
         return prefs.strictlygreater(sd1[1], sd2[1])
     else:
         return prefs.strictlygreater(sd2[0], sd1[0])
コード例 #2
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        qts = set(query.tf.keys())
        o1 = ctx.f.vocab_overlap(qts, d1.tf.keys())
        o2 = ctx.f.vocab_overlap(qts, d2.tf.keys())

        return prefs.strictlygreater(o2, o1)
コード例 #3
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        terms_to_test = set(query.termseq).intersection(
            d1.termseq).intersection(d2.termseq)

        avg1 = ctx.f.average_between_qterms(terms_to_test, d1.termseq)
        avg2 = ctx.f.average_between_qterms(terms_to_test, d2.termseq)

        return prefs.strictlygreater(avg2, avg1)
コード例 #4
0
 def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                d2: Document):
     tf1 = 0
     tf2 = 0
     for qt in query.termseq:
         tf1 += d1.tf[qt]
         tf2 += d2.tf[qt]
     if not prefs.approximatelyEqual(tf1, tf2):
         # at least 10% difference
         return prefs.strictlygreater(tf1, tf2)
     return 0
コード例 #5
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        fpsum1 = 0
        fpsum2 = 0

        ts = set(d1.termseq).intersection(d2.termseq)
        for t in set(query.termseq):
            if t in ts:
                fpsum1 += d1.termseq.index(t)
                fpsum2 += d2.termseq.index(t)

        return prefs.strictlygreater(fpsum2, fpsum1)
コード例 #6
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        qterms = list(query.tf.keys())
        ssum = np.zeros(len(qterms))

        for i1, i2 in itertools.combinations(np.arange(len(qterms)), 2):
            sim = ctx.f.synset_similarity(qterms[i1], qterms[i2])
            ssum[i1] += sim
            ssum[i2] += sim

        tmax = qterms[np.argmax(ssum)]
        return prefs.strictlygreater(d1.tf[tmax], d2.tf[tmax])
コード例 #7
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        terms = query.termseq
        tl = len(terms)

        def find_idx(seq: typing.Sequence[str]):
            for i in (i for i, e in enumerate(seq) if e == terms[0]):
                if i + tl <= len(seq) and seq[i:i + tl] == terms:
                    return i
            return float('inf')

        return prefs.strictlygreater(find_idx(d2.termseq),
                                     find_idx(d1.termseq))
コード例 #8
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        sd1 = 0
        sd2 = 0
        for qt1, qt2 in itertools.combinations(query.tf.keys(), 2):
            if prefs.approximatelyEqual(ctx.f.td(qt1), ctx.f.td(qt2)):
                d1q1 = d1.tf[qt1]
                d2q1 = d2.tf[qt1]
                d1q2 = d1.tf[qt2]
                d2q2 = d2.tf[qt2]

                sd1 += (d2q1 == d1q1 +
                        d1q2) and (d2q2 == 0) and (d1q1 != 0) and (d1q2 != 0)
                sd2 += (d1q1 == d2q1 +
                        d2q2) and (d1q2 == 0) and (d2q1 != 0) and (d2q2 != 0)

        return prefs.strictlygreater(sd1, sd2)
コード例 #9
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):

        sd1 = 0
        sd2 = 0

        def check(t, dx, dy):
            return dx.tf[t] > dy.tf[t] and len(dx) == (len(dy) + dx.tf[t] -
                                                       dy.tf[t])

        for t in query.tf.keys():
            if check(t, d1, d2):
                sd1 += 1
            elif check(t, d2, d1):
                sd2 += 1

        return prefs.strictlygreater(sd1, sd2)
コード例 #10
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        score = 0

        for qt1, qt2 in itertools.combinations(query.tf.keys(), 2):

            # qt1 is rarer
            if not ctx.f.idf(qt1) >= ctx.f.idf(qt2):
                qt1, qt2 = qt2, qt1

            # term pair is valid
            if not ((d1.tf[qt1] == d2.tf[qt2] and d1.tf[qt2] == d2.tf[qt1]) or
                    (query.tf[qt1] > query.tf[qt2])):
                continue

            # document with more occurrences of qt1 gets a point
            diff = d1.tf[qt1] - d2.tf[qt1]
            score += diff > 0 and 1 or diff < 0 and -1 or 0

        return prefs.strictlygreater(score, 0)
コード例 #11
0
    def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                   d2: Document):
        sim1 = 0
        sim2 = 0

        d1_terms = set(d1.tf)
        d2_terms = set(d2.tf)

        d_terms_both = d1_terms.intersection(d2_terms)

        def sum_sim(ts):
            return np.sum(self._similarity(ctx, ts, query.tf))

        sim1 += sum_sim(d_terms_both)
        sim2 += sum_sim(d_terms_both)
        sim1 += sum_sim(d1_terms.difference(d2_terms))
        sim2 += sum_sim(d2_terms.difference(d1_terms))

        sim1 /= ctx.f.wordcount(d1)
        sim2 /= ctx.f.wordcount(d2)

        return prefs.strictlygreater(sim1, sim2)
コード例 #12
0
 def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                d2: Document):
     qterms = set(query.tf.keys())
     sd1 = ctx.f.average_smallest_span(qterms, d1.termseq)
     sd2 = ctx.f.average_smallest_span(qterms, d2.termseq)
     return prefs.strictlygreater(sd2, sd1)
コード例 #13
0
 def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                d2: Document):
     ts = set(query.tf.keys())
     s1 = ts.intersection(d1.tf.keys())
     s2 = ts.intersection(d2.tf.keys())
     return prefs.strictlygreater(len(s1), len(s2))
コード例 #14
0
 def preference(self, ctx: RerankingContext, query: Query, d1: Document,
                d2: Document):
     # prefer the shorter document
     return prefs.strictlygreater(len(d2), len(d1))