Beispiel #1
0
 def test_tf_idf_5(self):
     dtc = [ ["a", "b", "a", "c"], 
             ["b", "d"] ]
     w = tf_idf(dtc)
     self.assertEqual(w, (["a", "b", "c", "d"],
                          [[2/4.0 * log(2/1.0), # a
                            1/4.0 * log(2/2.0), # b 
                            1/4.0 * log(2/1.0), # c
                            0/4.0 * log(2/1.0)  # d
                            ], 
                            [0/2.0 * log(2/1.0), # a
                             1/2.0 * log(2/2.0), # b 
                             0/2.0 * log(2/1.0), # c
                             1/2.0 * log(2/1.0)  # d
                         ] 
                       ]))
Beispiel #2
0
 def test_tf_idf_5(self):
     dtc = [["a", "b", "a", "c"], ["b", "d"]]
     w = tf_idf(dtc)
     self.assertEqual(
         w,
         (
             ["a", "b", "c", "d"],
             [
                 [
                     2 / 4.0 * log(2 / 1.0),  # a
                     1 / 4.0 * log(2 / 2.0),  # b 
                     1 / 4.0 * log(2 / 1.0),  # c
                     0 / 4.0 * log(2 / 1.0)  # d
                 ],
                 [
                     0 / 2.0 * log(2 / 1.0),  # a
                     1 / 2.0 * log(2 / 2.0),  # b 
                     0 / 2.0 * log(2 / 1.0),  # c
                     1 / 2.0 * log(2 / 1.0)  # d
                 ]
             ]))
Beispiel #3
0
def tf_idf_indicator_weight(terms_per_elem):
    # consider the pair of documents as a combined collection 
    # of N-single sentence documents
    # tf is either 1 or 0, depending on wether the term occurs in the sentence
    terms, weights = tf_idf(terms_per_elem, normalized=True, indicator=True)
    return weights
Beispiel #4
0
 def test_tf_idf_empty_3(self):
     dtc = [["a"], []]
     self.assertEqual(tf_idf(dtc), (["a"], [[1 / 1 * log(2 / 1)], [0.0]]))
Beispiel #5
0
 def test_tf_idf_1(self):
     dtc = [["a"]]
     w = tf_idf(dtc)
     self.assertEqual(w, (["a"], [[1 / 1.0 * log(1 / 1.0)]]))
Beispiel #6
0
 def test_tf_idf_empty_2(self):
     dtc = [[]]
     self.assertEqual(tf_idf(dtc), ([], [[]]))
Beispiel #7
0
 def test_tf_idf_empty_1(self):
     dtc = []
     self.assertEqual(tf_idf(dtc), ([], []))
Beispiel #8
0
 def test_tf_idf_4(self):
     dtc = [["a"], ["b"]]
     w = tf_idf(dtc)
     self.assertEqual(w, (["a", "b"], [[1 / 1 * log(2 / 1.0), 0.0],
                                       [0.0, 1 / 1 * log(2 / 1.0)]]))
Beispiel #9
0
 def test_tf_idf_1(self):
     dtc = [ ["a"] ]
     w = tf_idf(dtc)
     self.assertEqual(w, (["a"], 
                          [[1/1.0 * log(1/1.0)]]))
Beispiel #10
0
 def test_tf_idf_empty_3(self):
     dtc = [["a"], []]
     self.assertEqual(tf_idf(dtc), (["a"], [[1/1 * log(2/1)], [0.0]]))
Beispiel #11
0
 def test_tf_idf_empty_2(self):
     dtc = [[]]
     self.assertEqual(tf_idf(dtc), ([], [[]]))
Beispiel #12
0
 def test_tf_idf_empty_1(self):
     dtc = []
     self.assertEqual(tf_idf(dtc), ([], []))
Beispiel #13
0
 def test_tf_idf_4(self):
     dtc = [ ["a"], ["b"] ]
     w = tf_idf(dtc)
     self.assertEqual(w, (["a", "b"],
                          [[1/1 * log(2/1.0), 0.0], 
                           [0.0, 1/1 * log(2/1.0)]]))