Beispiel #1
0
    def test_term_frequency_for(self):
        document_tokens = ["cisco", "router", "cisco", "switch"]

        frequencies = WeighingMeasure.term_frequency_for(document_tokens)

        self.assertEqual(len(frequencies.keys()), 3)
        self.assertEqual(frequencies["cisco"], 2)
        self.assertEqual(frequencies["router"], 1)
Beispiel #2
0
    def test_document_frequncy_measure_for(self):
        document1_tokens = ["cisco", "router", "cisco", "switch"]
        document2_tokens = ["network", "router", "cisco", "bridge", "voip"]
        tokenized_documents = {"doc1": document1_tokens, "doc2": document2_tokens}

        document_frequencies = WeighingMeasure.document_frequency_for(tokenized_documents)

        self.assertEqual(len(document_frequencies.keys()), 6)
        self.assertEqual(document_frequencies["cisco"], 2)
        self.assertEqual(document_frequencies["switch"], 1)
        self.assertEqual(document_frequencies["voip"], 1)
Beispiel #3
0
    def test_term_frequency_across(self):
        document1_tokens = ["cisco", "router", "cisco", "switch"]
        document2_tokens = ["network", "router", "cisco", "bridge", "voip"]
        tokenized_documents = {"doc1": document1_tokens, "doc2": document2_tokens}

        term_frequencies_across_docs = WeighingMeasure.term_frequencies_across(tokenized_documents)

        self.assertEqual(len(term_frequencies_across_docs.keys()), 6)
        self.assertEqual(term_frequencies_across_docs["cisco"], 3)
        self.assertEqual(term_frequencies_across_docs["router"], 2)
        self.assertEqual(term_frequencies_across_docs["switch"], 1)
Beispiel #4
0
 def term_frequencies(self):
     if len(self.term_frequency) == 0:
         self.term_frequency = WeighingMeasure.term_frequency_for(self.all_tokens())
     return self.term_frequency