def dotest(terms, expected, which): files = filelist(rootdir) terms = words(terms) # print(terms) if which == 0: linear_docs = linear_search(files, terms) # print(filenames(linear_docs)) names = filenames(linear_docs) names.sort() expected.sort() #assert filenames(linear_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) elif which == 1: index = create_index(files) index_docs = index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) else: index = myhtable_create_index(files) index_docs = myhtable_index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected)
def test_index_berlitz(): terms = "hawaii travel" files = filelist(rootdir) terms = words(terms) index = create_index(files) index_docs = index_search(files, index, terms) expected = ['HistoryHawaii.txt'] assert filenames(index_docs) == expected
def test_index_berlitz_none(): terms = "missspellinnng" files = filelist(rootdir) terms = words(terms) index = create_index(files) index_docs = index_search(files, index, terms) expected = [] assert filenames(index_docs) == expected
# Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) print N, "files" index = None while True: terms = raw_input("Search terms: ") terms = words(terms) if impl == 'linear': docs = linear_search(files, terms) elif impl == 'index': if index is None: index = create_index(files) print "Index complete" docs = index_search(files, index, terms) elif impl == 'myhtable': if index is None: index = myhtable_create_index(files) print "Index complete" docs = myhtable_index_search(files, index, terms) else: print "Invalid search type:", impl break page = results(docs, terms) f = open("/tmp/results.html", "w") f.write(page) f.close() webbrowser.open_new_tab("file:///tmp/results.html")
# Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) print(N, "files") index = None while True: terms = input("Search terms: ") terms = words(terms) if impl=='linear': docs = linear_search(files, terms) elif impl == 'index': if index is None: index = create_index(files) print("Index complete") docs = index_search(files, index, terms) elif impl == 'myhtable': if index is None: index = myhtable_create_index(files) print("Index complete") docs = myhtable_index_search(files, index, terms) else: print("Invalid search type:", impl) break page = results(docs, terms) with open("/tmp/results.html", "w", encoding='UTF-8') as f: f.write(page) webbrowser.open_new_tab("file:///tmp/results.html")
index = None while True: terms = input("Search terms: ") # EX: "Ronald Reagan" terms = words(terms) # EX: ['reagan', 'ronald'] if impl == 'linear': # print("YES") # print(files) # print(terms) docs = linear_search(files, terms) elif impl == 'index': if index is None: index = create_index( files) # files is a list of fully-qualified filenames print("Index complete") # terms is a list of normalized words docs = index_search(files, index, terms) elif impl == 'myhtable': if index is None: index = myhtable_create_index(files) print("Index complete") docs = myhtable_index_search(files, index, terms) else: print("Invalid search type:", impl) break page = results(docs, terms) with open("/tmp/results.html", "w", encoding='UTF-8') as f: f.write(page) webbrowser.open_new_tab("file:///tmp/results.html")