Esempio n. 1
0
    def test_normalized_words_frequencies_with_smoothing_term(self):
        words = "a b c d e c b d c e e d e d e".split()
        model = TfDocumentModel(tuple(words))

        self.assertAlmostEqual(model.normalized_term_frequency("a", 0.5), 0.5 + 1/10)
        self.assertAlmostEqual(model.normalized_term_frequency("b", 0.5), 0.5 + 2/10)
        self.assertAlmostEqual(model.normalized_term_frequency("c", 0.5), 0.5 + 3/10)
        self.assertAlmostEqual(model.normalized_term_frequency("d", 0.5), 0.5 + 4/10)
        self.assertAlmostEqual(model.normalized_term_frequency("e", 0.5), 0.5 + 5/10)
        self.assertAlmostEqual(model.normalized_term_frequency("z", 0.5), 0.5)

        self.assertEqual(model.most_frequent_terms(), ("e", "d", "c", "b", "a"))
Esempio n. 2
0
    def test_normalized_words_frequencies(self):
        words = "a b c d e c b d c e e d e d e".split()
        model = TfDocumentModel(tuple(words))

        self.assertAlmostEqual(model.normalized_term_frequency("a"), 1/5)
        self.assertAlmostEqual(model.normalized_term_frequency("b"), 2/5)
        self.assertAlmostEqual(model.normalized_term_frequency("c"), 3/5)
        self.assertAlmostEqual(model.normalized_term_frequency("d"), 4/5)
        self.assertAlmostEqual(model.normalized_term_frequency("e"), 5/5)
        self.assertAlmostEqual(model.normalized_term_frequency("z"), 0.0)

        self.assertEqual(model.most_frequent_terms(), ("e", "d", "c", "b", "a"))
Esempio n. 3
0
def test_normalized_words_frequencies():
    words = "a b c d e c b d c e e d e d e".split()
    model = TfDocumentModel(tuple(words))

    assert model.normalized_term_frequency("a") == pytest.approx(1/5)
    assert model.normalized_term_frequency("b") == pytest.approx(2/5)
    assert model.normalized_term_frequency("c") == pytest.approx(3/5)
    assert model.normalized_term_frequency("d") == pytest.approx(4/5)
    assert model.normalized_term_frequency("e") == pytest.approx(5/5)
    assert model.normalized_term_frequency("z") == pytest.approx(0.0)

    assert model.most_frequent_terms() == ("e", "d", "c", "b", "a")
Esempio n. 4
0
def test_normalized_words_frequencies_with_smoothing_term():
    words = "a b c d e c b d c e e d e d e".split()
    model = TfDocumentModel(tuple(words))

    assert model.normalized_term_frequency("a", 0.5) == pytest.approx(0.5 + 1/10)
    assert model.normalized_term_frequency("b", 0.5) == pytest.approx(0.5 + 2/10)
    assert model.normalized_term_frequency("c", 0.5) == pytest.approx(0.5 + 3/10)
    assert model.normalized_term_frequency("d", 0.5) == pytest.approx(0.5 + 4/10)
    assert model.normalized_term_frequency("e", 0.5) == pytest.approx(0.5 + 5/10)
    assert model.normalized_term_frequency("z", 0.5) == pytest.approx(0.5)

    assert model.most_frequent_terms() == ("e", "d", "c", "b", "a")
Esempio n. 5
0
def test_normalized_words_frequencies():
    words = "a b c d e c b d c e e d e d e".split()
    model = TfDocumentModel(tuple(words))

    assert model.normalized_term_frequency("a") == pytest.approx(1 / 5)
    assert model.normalized_term_frequency("b") == pytest.approx(2 / 5)
    assert model.normalized_term_frequency("c") == pytest.approx(3 / 5)
    assert model.normalized_term_frequency("d") == pytest.approx(4 / 5)
    assert model.normalized_term_frequency("e") == pytest.approx(5 / 5)
    assert model.normalized_term_frequency("z") == pytest.approx(0.0)

    assert model.most_frequent_terms() == ("e", "d", "c", "b", "a")
Esempio n. 6
0
    def test_normalized_words_frequencies(self):
        words = "a b c d e c b d c e e d e d e".split()
        model = TfDocumentModel(tuple(words))

        self.assertAlmostEqual(model.normalized_term_frequency("a"), 1 / 5)
        self.assertAlmostEqual(model.normalized_term_frequency("b"), 2 / 5)
        self.assertAlmostEqual(model.normalized_term_frequency("c"), 3 / 5)
        self.assertAlmostEqual(model.normalized_term_frequency("d"), 4 / 5)
        self.assertAlmostEqual(model.normalized_term_frequency("e"), 5 / 5)
        self.assertAlmostEqual(model.normalized_term_frequency("z"), 0.0)

        self.assertEqual(model.most_frequent_terms(),
                         ("e", "d", "c", "b", "a"))
Esempio n. 7
0
def test_normalized_words_frequencies_with_smoothing_term():
    words = "a b c d e c b d c e e d e d e".split()
    model = TfDocumentModel(tuple(words))

    assert model.normalized_term_frequency("a",
                                           0.5) == pytest.approx(0.5 + 1 / 10)
    assert model.normalized_term_frequency("b",
                                           0.5) == pytest.approx(0.5 + 2 / 10)
    assert model.normalized_term_frequency("c",
                                           0.5) == pytest.approx(0.5 + 3 / 10)
    assert model.normalized_term_frequency("d",
                                           0.5) == pytest.approx(0.5 + 4 / 10)
    assert model.normalized_term_frequency("e",
                                           0.5) == pytest.approx(0.5 + 5 / 10)
    assert model.normalized_term_frequency("z", 0.5) == pytest.approx(0.5)

    assert model.most_frequent_terms() == ("e", "d", "c", "b", "a")
Esempio n. 8
0
    def test_normalized_words_frequencies_with_smoothing_term(self):
        words = "a b c d e c b d c e e d e d e".split()
        model = TfDocumentModel(tuple(words))

        self.assertAlmostEqual(model.normalized_term_frequency("a", 0.5),
                               0.5 + 1 / 10)
        self.assertAlmostEqual(model.normalized_term_frequency("b", 0.5),
                               0.5 + 2 / 10)
        self.assertAlmostEqual(model.normalized_term_frequency("c", 0.5),
                               0.5 + 3 / 10)
        self.assertAlmostEqual(model.normalized_term_frequency("d", 0.5),
                               0.5 + 4 / 10)
        self.assertAlmostEqual(model.normalized_term_frequency("e", 0.5),
                               0.5 + 5 / 10)
        self.assertAlmostEqual(model.normalized_term_frequency("z", 0.5), 0.5)

        self.assertEqual(model.most_frequent_terms(),
                         ("e", "d", "c", "b", "a"))