Exemple #1
0
def characterize(bd,  # this document
                 bj,  # in this document collection
                 boost=param_dict(default=batch.BOOST),
                 k1=batch.K1,
                 b=param_dict(default=batch.B)):
    words = set()
    for bow in bd.values():
        for w in bow.keys():
            words.add(w)
    pr = prf_result()
    for w in words:
        pr[w] = bm25f({w: 1}, bd, bj, boost, k1, b)
    return pr
Exemple #2
0
 def test_b(self):
     b = param_dict(default=0.75)
     b['title'] = 0.50
     b['body'] = 1.00
     self.assertEqual((1 * 1.0) / ((1 - 0.50) + 0.50 * 2 / (4 / 4)) +
                      (1 * 1.0) / ((1 - 1.00) + 1.00 * 1 / (3 / 4)) +
                      (0 * 1.0) / ((1 - 0.75) + 0.75 * 1 / (1 / 4)),
                      weight('テスト', self.bd0, self.bj, b=b))
Exemple #3
0
 def test_boost(self):
     boost = param_dict(default=1.0)
     boost['title'] = 100
     boost['body'] = 0.1
     self.assertEqual((1 * 100) / ((1 - 0.75) + 0.75 * 2 / (4 / 4)) +
                      (1 * 0.1) / ((1 - 0.75) + 0.75 * 1 / (3 / 4)) +
                      (0 * 1.0) / ((1 - 0.75) + 0.75 * 1 / (1 / 4)),
                      weight('テスト', self.bd0, self.bj, boost=boost))
Exemple #4
0
 def test_param_dict_omit_default(self):
     d = {'title': 10}
     pd = param_dict(d=d)
     self.assertEqual(10, pd['title'])
     self.assertEqual(None, pd['body'])
Exemple #5
0
 def test_param_dict_omit_d(self):
     pd = param_dict(default=1)
     self.assertEqual(1, pd['title'])
     self.assertEqual(1, pd['body'])
Exemple #6
0
 def test_param_dict(self):
     d = {'title': 10}
     pd = param_dict(d=d, default=1)
     self.assertEqual(10, pd['title'])
     self.assertEqual(1, pd['body'])