def characterize(bd, # this document bj, # in this document collection boost=param_dict(default=batch.BOOST), k1=batch.K1, b=param_dict(default=batch.B)): words = set() for bow in bd.values(): for w in bow.keys(): words.add(w) pr = prf_result() for w in words: pr[w] = bm25f({w: 1}, bd, bj, boost, k1, b) return pr
def test_k1(self): self.assertAlmostEqual( 1.37142857142857 / (2.0 + 1.37142857142857) * -0.84729786038720 + 0.30769230769230 / (2.0 + 0.30769230769230) * 0.84729786038720, bm25f(self.query, self.bd0, self.bj, k1=2.0))