def testMakeobjectsfromxml(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) for thread in threads: for document in thread._documents: print(document.text)
def testtokenizerfromfile(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() for thread in threads_tokenized: print(thread._query._body) for document in thread._documents: print(document._text)
def test_final(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) collection_tokenized = tokenizer.tokenize() coll_model = CollectionModel(collection_tokenized) doc_model = DocumentModel(collection_tokenized) ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model) ret_model.calculate_relevance()
def testdocumentmodel(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() collection_model = CollectionModel(threads_tokenized) freq_collection = collection_model.calculate_frequency() print(freq_collection) document_model = DocumentModel(threads_tokenized) freq_document = document_model.calculate_frequency() print(freq_document)