def test_searchengine_calculate_idf(): ''' Tests the term_frequency function. ''' test = SearchEngine(DIRECTORY) assert_equals(0, test._calculate_idf('croissant')) assert_equals(0, test._calculate_idf('i')) assert_equals(math.log(1.5), test._calculate_idf('chocolate')) assert_equals(math.log(3), test._calculate_idf('apple'))
def test_search_engine_1(): test = SearchEngine('test_dir1') # Tests small number of words assert_equals('test_dir1', test._dir) assert_equals(5, test._num_docs) assert_equals(0, test._calculate_idf('Cat')) assert_equals(.223, test._calculate_idf('I')) assert_equals(1.609, test._calculate_idf('dogs')) assert_equals(['doc3.txt'], test.search('dogs')) assert_equals(['doc3.txt'], test.search('dogs')) assert_equals(['doc4.txt', 'doc1.txt'], test.search('eat')) assert_equals(['doc4.txt', 'doc3.txt', 'doc1.txt'], test.search('eat dogs'))
def test_search_engine_3(): test = SearchEngine('test_dir3') # Tests Empty Directory assert_equals(1, test._num_docs) assert_equals({}, test._docs) assert_equals(0, test._calculate_idf('Samsung')) assert_equals(None, test.search('Samsung'))
def test_search_engine_2(): test = SearchEngine('test_dir2') # Tests large number of words assert_equals(4, test._num_docs) assert_equals(.287, test._calculate_idf('Samsung')) assert_equals(['ChromeBook.html', 'att.html', 'facebook.html'], test.search('Samsung')) assert_equals(['ChromeBook.html', 'att.html', 'facebook.html'], test.search('Samsung companies!')) assert_equals(None, test.search('adksamfk'))
def test_search_engine_class(): """ This function tests the correctness of the functions implemented in SearchEngine class. """ search_engine = SearchEngine('test-files') assert_equals(math.log(3/2), search_engine._calculate_idf('dogs')) fir_search = search_engine.search('dogs') assert_equals(['test-files/doc3.txt', 'test-files/doc1.txt'], fir_search) sec_search = search_engine.search('Cats very cute') assert_equals(['test-files/doc2.txt'], sec_search) thir_search = search_engine.search('happy') assert_equals(None, thir_search)