def dotest(terms, expected, which): files = filelist(rootdir) terms = words(terms) # print(terms) if which == 0: linear_docs = linear_search(files, terms) # print(filenames(linear_docs)) names = filenames(linear_docs) names.sort() expected.sort() #assert filenames(linear_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) elif which == 1: index = create_index(files) index_docs = index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) else: index = myhtable_create_index(files) index_docs = myhtable_index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected)
def time_check(choice: str, ite: int): l = [] lim = 10000000 if choice == 'sorted': for i in range(lim): l.append(random.randint(0, lim)) l.sort() elif choice == 'normalized': for i in range(lim): l.append(i) inter = [] seq = [] bnry = [] indexed = [] for i in range(0, ite): value = random.randint(0, lim) if (choice == 'normalized'): start2 = time.clock() ret1 = interpolation_search(l, value) end2 = time.clock() if ret1 == True: time2 = end2 - start2 inter.append(time2) start1 = time.clock() ret2 = linear_search(l, value) end1 = time.clock() if ret2 == True: time1 = end1 - start1 seq.append(time1) start3 = time.clock() ret3 = binary_search(l, value) end3 = time.clock() if ret3 == True: time3 = end3 - start3 bnry.append(time3) start4 = time.clock() ret4 = index_search(l, value) end4 = time.clock() if ret4 == True: time4 = end4 - start4 indexed.append(time4) plt.xlabel('Iterations') plt.ylabel('Time') if len(inter) != 0: plt.plot(inter, color='red', label='interpolation') plt.plot(seq, color='green', label='sequential') plt.plot(bnry, color='blue', label='binary') plt.plot(indexed, color='purple', label='indexed') plt.legend() plt.grid(True) plt.show()
def test_index_berlitz(): terms = "hawaii travel" files = filelist(rootdir) terms = words(terms) index = create_index(files) index_docs = index_search(files, index, terms) expected = ['HistoryHawaii.txt'] assert filenames(index_docs) == expected
def test_index_berlitz_none(): terms = "missspellinnng" files = filelist(rootdir) terms = words(terms) index = create_index(files) index_docs = index_search(files, index, terms) expected = [] assert filenames(index_docs) == expected
# files = files[:100] N = len(files) print N, "files" index = None while True: terms = raw_input("Search terms: ") terms = words(terms) if impl == 'linear': docs = linear_search(files, terms) elif impl == 'index': if index is None: index = create_index(files) print "Index complete" docs = index_search(files, index, terms) elif impl == 'myhtable': if index is None: index = myhtable_create_index(files) print "Index complete" docs = myhtable_index_search(files, index, terms) else: print "Invalid search type:", impl break page = results(docs, terms) f = open("/tmp/results.html", "w") f.write(page) f.close() webbrowser.open_new_tab("file:///tmp/results.html")
# Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) print(N, "files") index = None while True: terms = input("Search terms: ") terms = words(terms) if impl=='linear': docs = linear_search(files, terms) elif impl == 'index': if index is None: index = create_index(files) print("Index complete") docs = index_search(files, index, terms) elif impl == 'myhtable': if index is None: index = myhtable_create_index(files) print("Index complete") docs = myhtable_index_search(files, index, terms) else: print("Invalid search type:", impl) break page = results(docs, terms) with open("/tmp/results.html", "w", encoding='UTF-8') as f: f.write(page) webbrowser.open_new_tab("file:///tmp/results.html")
def time_check(choice: str, ite: int): l = [] lim = 1000000 if choice == 'sorted': for i in range(lim): l.append(random.randint(0, lim)) l.sort() elif choice == 'normalized': for i in range(lim): l.append(i) inter_time = [] seq_time = [] bnry_time = [] indexed_time = [] inter_counter = [] seq_counter = [] bnry_counter = [] indexed_counter = [] for i in range(0, ite): value = random.randint(0, lim) if (choice == 'normalized'): start1 = time.clock() counter1 = interpolation_search(l, value) end1 = time.clock() time1 = end1 - start1 inter_time.append(time1) inter_counter.append(counter1) start2 = time.clock() counter2 = linear_search(l, value) end2 = time.clock() time2 = end2 - start2 seq_time.append(time2) seq_counter.append(counter2) start3 = time.clock() counter3 = binary_search(l, value) end3 = time.clock() time3 = end3 - start3 bnry_time.append(time3) bnry_counter.append(counter3) start4 = time.clock() counter4 = index_search(l, value) end4 = time.clock() time4 = end4 - start4 indexed_time.append(time4) indexed_counter.append(counter4) #print(seq_counter.shape) plt.title("Counter x Number of searchs") plt.xlabel('Steps') plt.ylabel('Number of searchs') if len(inter_time) != 0: plt.subplot(2, 2, 1) plt.hist(inter_counter, color='red', label='interpolation') plt.xlabel('Steps') plt.ylabel('Number of searchs') plt.legend() plt.grid(True) plt.subplot(2, 2, 2) plt.hist(seq_counter, color='green', label='sequential') plt.xlabel('Steps') plt.ylabel('Number of searchs') plt.legend() plt.grid(True) plt.subplot(2, 2, 3) plt.hist(bnry_counter, color='blue', label='binary') plt.xlabel('Steps') plt.ylabel('Number of searchs') plt.legend() plt.grid(True) plt.subplot(2, 2, 4) plt.hist(indexed_counter, color='purple', label='indexed') plt.xlabel('Steps') plt.ylabel('Number of searchs') plt.legend() plt.grid(True) plt.show() plt.title("Number of searchs x Time") plt.xlabel('Number of searchs') plt.ylabel('Time') if len(inter_time) != 0: plt.plot(inter_time, color='red', label='interpolation') plt.plot(seq_time, color='green', label='sequential') plt.plot(bnry_time, color='blue', label='binary') plt.plot(indexed_time, color='purple', label='indexed') plt.legend() plt.grid(True) plt.show()