Beispiel #1
0
    def test_at_least_one_of_the_tokens(self):
        test_doc_list = [
            TextDocument("there is a city named New York", "doc1"),
            TextDocument("In Britain there is a city named York", "doc2")
        ]
        expected_list = ['doc1', 'doc2']
        small_collection = DocumentCollection.from_document_list(test_doc_list)
        searcher = SearchEngine(small_collection)
        query = "New York Britain"
        top_docs = searcher.ranked_documents(query)

        result_list = []
        for doc, sim in top_docs:
            result_list.append(doc.id)

        self.assertEqual(result_list, expected_list)
Beispiel #2
0
 def setUp(self):
     test_doc_list = [
         TextDocument(text_and_id[0], text_and_id[1])
         for text_and_id in [("the cat sat on a mat",
                              "doc1"), ("a rose is a rose", "doc2")]
     ]
     self.small_collection = DocumentCollection.from_document_list(
         test_doc_list)
Beispiel #3
0
 def test_unknown_word_cosine(self):
     """ Return 0 if cosine similarity is called for documents with only out-of-vocabulary words. """
     # Document that only contains words that never occurred in the document collection.
     query_doc = TextDocument(text="unknownwords", id=None)
     # Some document from collection.
     collection_doc = self.small_collection.docid_to_doc["doc1"]
     # Similarity should be zero (instead of undefined).
     self.assertEqual(
         self.small_collection.cosine_similarity(query_doc, collection_doc),
         0.)
Beispiel #4
0
    def setUp(self):
        test_doc_list = [TextDocument("the cat sat\non a mat", "doc1")]

        self.small_collection = DocumentCollection.from_document_list(
            test_doc_list)
        self.searcher = SearchEngine(self.small_collection)
Beispiel #5
0
 def setUp(self):
     self.test_reply_email = TextDocument(">>>>forword", "doc1")