def characterize(bd, # this document bj, # in this document collection boost=param_dict(default=batch.BOOST), k1=batch.K1, b=param_dict(default=batch.B)): words = set() for bow in bd.values(): for w in bow.keys(): words.add(w) pr = prf_result() for w in words: pr[w] = bm25f({w: 1}, bd, bj, boost, k1, b) return pr
def test_b(self): b = param_dict(default=0.75) b['title'] = 0.50 b['body'] = 1.00 self.assertEqual((1 * 1.0) / ((1 - 0.50) + 0.50 * 2 / (4 / 4)) + (1 * 1.0) / ((1 - 1.00) + 1.00 * 1 / (3 / 4)) + (0 * 1.0) / ((1 - 0.75) + 0.75 * 1 / (1 / 4)), weight('テスト', self.bd0, self.bj, b=b))
def test_boost(self): boost = param_dict(default=1.0) boost['title'] = 100 boost['body'] = 0.1 self.assertEqual((1 * 100) / ((1 - 0.75) + 0.75 * 2 / (4 / 4)) + (1 * 0.1) / ((1 - 0.75) + 0.75 * 1 / (3 / 4)) + (0 * 1.0) / ((1 - 0.75) + 0.75 * 1 / (1 / 4)), weight('テスト', self.bd0, self.bj, boost=boost))
def test_param_dict_omit_default(self): d = {'title': 10} pd = param_dict(d=d) self.assertEqual(10, pd['title']) self.assertEqual(None, pd['body'])
def test_param_dict_omit_d(self): pd = param_dict(default=1) self.assertEqual(1, pd['title']) self.assertEqual(1, pd['body'])
def test_param_dict(self): d = {'title': 10} pd = param_dict(d=d, default=1) self.assertEqual(10, pd['title']) self.assertEqual(1, pd['body'])