def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): qterms = set(query.tf.keys()) sd1 = ctx.f.closest_grouping_size_and_count(qterms, d1.termseq) sd2 = ctx.f.closest_grouping_size_and_count(qterms, d2.termseq) if sd1[0] == sd2[0]: return prefs.strictlygreater(sd1[1], sd2[1]) else: return prefs.strictlygreater(sd2[0], sd1[0])
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): qts = set(query.tf.keys()) o1 = ctx.f.vocab_overlap(qts, d1.tf.keys()) o2 = ctx.f.vocab_overlap(qts, d2.tf.keys()) return prefs.strictlygreater(o2, o1)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): terms_to_test = set(query.termseq).intersection( d1.termseq).intersection(d2.termseq) avg1 = ctx.f.average_between_qterms(terms_to_test, d1.termseq) avg2 = ctx.f.average_between_qterms(terms_to_test, d2.termseq) return prefs.strictlygreater(avg2, avg1)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): tf1 = 0 tf2 = 0 for qt in query.termseq: tf1 += d1.tf[qt] tf2 += d2.tf[qt] if not prefs.approximatelyEqual(tf1, tf2): # at least 10% difference return prefs.strictlygreater(tf1, tf2) return 0
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): fpsum1 = 0 fpsum2 = 0 ts = set(d1.termseq).intersection(d2.termseq) for t in set(query.termseq): if t in ts: fpsum1 += d1.termseq.index(t) fpsum2 += d2.termseq.index(t) return prefs.strictlygreater(fpsum2, fpsum1)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): qterms = list(query.tf.keys()) ssum = np.zeros(len(qterms)) for i1, i2 in itertools.combinations(np.arange(len(qterms)), 2): sim = ctx.f.synset_similarity(qterms[i1], qterms[i2]) ssum[i1] += sim ssum[i2] += sim tmax = qterms[np.argmax(ssum)] return prefs.strictlygreater(d1.tf[tmax], d2.tf[tmax])
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): terms = query.termseq tl = len(terms) def find_idx(seq: typing.Sequence[str]): for i in (i for i, e in enumerate(seq) if e == terms[0]): if i + tl <= len(seq) and seq[i:i + tl] == terms: return i return float('inf') return prefs.strictlygreater(find_idx(d2.termseq), find_idx(d1.termseq))
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): sd1 = 0 sd2 = 0 for qt1, qt2 in itertools.combinations(query.tf.keys(), 2): if prefs.approximatelyEqual(ctx.f.td(qt1), ctx.f.td(qt2)): d1q1 = d1.tf[qt1] d2q1 = d2.tf[qt1] d1q2 = d1.tf[qt2] d2q2 = d2.tf[qt2] sd1 += (d2q1 == d1q1 + d1q2) and (d2q2 == 0) and (d1q1 != 0) and (d1q2 != 0) sd2 += (d1q1 == d2q1 + d2q2) and (d1q2 == 0) and (d2q1 != 0) and (d2q2 != 0) return prefs.strictlygreater(sd1, sd2)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): sd1 = 0 sd2 = 0 def check(t, dx, dy): return dx.tf[t] > dy.tf[t] and len(dx) == (len(dy) + dx.tf[t] - dy.tf[t]) for t in query.tf.keys(): if check(t, d1, d2): sd1 += 1 elif check(t, d2, d1): sd2 += 1 return prefs.strictlygreater(sd1, sd2)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): score = 0 for qt1, qt2 in itertools.combinations(query.tf.keys(), 2): # qt1 is rarer if not ctx.f.idf(qt1) >= ctx.f.idf(qt2): qt1, qt2 = qt2, qt1 # term pair is valid if not ((d1.tf[qt1] == d2.tf[qt2] and d1.tf[qt2] == d2.tf[qt1]) or (query.tf[qt1] > query.tf[qt2])): continue # document with more occurrences of qt1 gets a point diff = d1.tf[qt1] - d2.tf[qt1] score += diff > 0 and 1 or diff < 0 and -1 or 0 return prefs.strictlygreater(score, 0)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): sim1 = 0 sim2 = 0 d1_terms = set(d1.tf) d2_terms = set(d2.tf) d_terms_both = d1_terms.intersection(d2_terms) def sum_sim(ts): return np.sum(self._similarity(ctx, ts, query.tf)) sim1 += sum_sim(d_terms_both) sim2 += sum_sim(d_terms_both) sim1 += sum_sim(d1_terms.difference(d2_terms)) sim2 += sum_sim(d2_terms.difference(d1_terms)) sim1 /= ctx.f.wordcount(d1) sim2 /= ctx.f.wordcount(d2) return prefs.strictlygreater(sim1, sim2)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): qterms = set(query.tf.keys()) sd1 = ctx.f.average_smallest_span(qterms, d1.termseq) sd2 = ctx.f.average_smallest_span(qterms, d2.termseq) return prefs.strictlygreater(sd2, sd1)
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): ts = set(query.tf.keys()) s1 = ts.intersection(d1.tf.keys()) s2 = ts.intersection(d2.tf.keys()) return prefs.strictlygreater(len(s1), len(s2))
def preference(self, ctx: RerankingContext, query: Query, d1: Document, d2: Document): # prefer the shorter document return prefs.strictlygreater(len(d2), len(d1))