def time_bothcomplete_new(mylist, mytrie, prefix, k): """ TLDR: Record the time it takes to find matches for slow- and auto-complete. With fixed input (file, prefix and k), this function runs slowcomplete and autocomplete 100 times in 100 iterations (in total 10,000 runs) and records the time to run each function 100 times into a list of length 100. Then it calculates the mean and standard deviation of the two lists. """ # slowcomplete record_slow = [] start = time.time() for aa in range(1, 10): start = time.time() for bb in range(1, 1000): autocomplete_me.slowcomplete(mylist, prefix, k) time_slow = time.time() - start record_slow.append(time_slow) # autocomplete record_auto = [] for cc in range(1, 10): start = time.time() for dd in range(1, 1000): autocomplete_me.autocomplete(mytrie, prefix, k) time_auto = time.time() - start record_auto.append(time_auto) return stat.mean(record_slow), stat.stdev(record_slow), \ stat.mean(record_auto), stat.stdev(record_auto)
def test_prune_and_rescale(): def rescalefunc(weight): return (weight//10) def rescalefunc2(weight): return (weight-1) def rescalefunc3(weight): return (weight//2) # test2_auto autocomplete_me.prune_trie(test2_auto, 20) assert autocomplete_me.autocomplete(test2_auto, "", 5) == [(920, "hermione"), (67, "her")] autocomplete_me.rescale_weight(test2_auto, rescalefunc) assert autocomplete_me.autocomplete(test2_auto, "", 5) == [(92, "hermione"), (6, "her")] assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 92 # babies_auto autocomplete_me.prune_trie(babies_auto, 300) assert autocomplete_me.autocomplete(babies_auto, "Gio", 5) == [(3086, "Giovanni"), (372, "Giovani")] assert autocomplete_me.autocomplete(babies_auto, "Ayl", 5) == [(1284, "Ayla"), (673, "Aylin"), (379, "Ayleen")] autocomplete_me.rescale_weight(babies_auto, rescalefunc2) assert autocomplete_me.autocomplete(babies_auto, "Sab", 5) == [(1174, "Sabrina")] assert autocomplete_me.autocomplete(babies_auto, "Sel", 5) == [(1043, "Selena"), (570, "Selah")] # mandarin_auto autocomplete_me.prune_trie(mandarin_auto, 4000) assert autocomplete_me.autocomplete(mandarin_auto, "不", 5) == [(15666, "不是"), (7167, "不要"), (6942, "不能"), (6892, "不知道"), (4256, "不起")] assert autocomplete_me.autocomplete(mandarin_auto, "好", 5) == [(6171, "好了"), (5391, "好吧"), (5170, "好的")] autocomplete_me.rescale_weight(mandarin_auto, rescalefunc3) assert autocomplete_me.autocomplete(mandarin_auto, "意", 5) == "No words match with the given prefix." assert autocomplete_me.autocomplete(mandarin_auto, "我", 5) == [(20865, "我的")]
def time_bothcomplete(mylist, mytrie, prefix, k): """ TLDR: Record the time it takes to find matches for slow- and auto-complete. With fixed inputs (file, prefix and k), this function runs slowcomplete and autocomplete 1000 iterations, and record the time respectively for each function into a list of length 1000. Then it calculates the 25th, 50th and 75th quantile of each list to compare the speed for the two functions. """ # slowcomplete record_slow = [] for ii in range(1, 1000): start = time.time() autocomplete_me.slowcomplete(mylist, prefix, k) time_slow = time.time() - start record_slow.append(time_slow) # autocomplete record_auto = [] for ii in range(1, 1000): start = time.time() autocomplete_me.autocomplete(mytrie, prefix, k) time_auto = time.time() - start record_auto.append(time_auto) return numpy.quantile(record_slow, 0.25), \ numpy.quantile(record_slow, 0.50), \ numpy.quantile(record_slow, 0.75), \ numpy.quantile(record_auto, 0.25), \ numpy.quantile(record_auto, 0.50), \ numpy.quantile(record_auto, 0.75)
def test_random(self): # test autocomplete with randomized texts self.assertEqual(autocomplete_me.autocomplete("a", randTrie, 5), slow_autocomplete("a", randFile, 5)) self.assertEqual(autocomplete_me.autocomplete("ac", randTrie, 5), slow_autocomplete("ac", randFile, 5)) self.assertEqual(autocomplete_me.autocomplete("act", randTrie, 5), slow_autocomplete("act", randFile, 5))
def test_add_term(): # test2_auto autocomplete_me.add_term(test2_auto, "hermione", 919) assert autocomplete_me.Trie(test2_auto).searchTrie("hermion").children["e"].fullword == "hermione" assert autocomplete_me.Trie(test2_auto).searchTrie("hermione").weight == 919 assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 919 assert autocomplete_me.Trie(test2_auto).searchTrie("her").weight == 67 assert autocomplete_me.autocomplete(test2_auto, "he", 3) == [(919,'hermione'),(67,'her')] # babies_auto autocomplete_me.add_term(babies_auto, "Ron", 19800301) assert autocomplete_me.autocomplete(babies_auto, "Ro", 3) == [(19800301,'Ron'),(6882,'Robert'),(2562,'Roman')] assert autocomplete_me.Trie(babies_auto).searchTrie("Ron").weight == 19800301 assert autocomplete_me.Trie(babies_auto).searchTrie("Ro").maxweight == 19800301 assert autocomplete_me.Trie(babies_auto).searchTrie("Ro").weight == -1
def test_autocomplete_for_wiki(self): wikiTrie = ac.read_terms("wiktionary.txt") result = ac.autocomplete("the", wikiTrie, 5) expected = [(5627187200, 'the'), (334039800, 'they'), (282026500, 'their'), (250991700, 'them'), (196120000, 'there')] self.assertEqual(expected, result)
def test_for_random_file(self): #generate file of random words and weights T = Trie() wordList = [] size = random.randint(600, 2500) noRepeat = {} for index in range(size): ranStr = ''.join([ random.choice(string.ascii_lowercase) for i in range(random.randint(2, 9)) ]) ranWeight = random.randint(1, 100000) #check for no repeating words if ranStr not in noRepeat: noRepeat[ranStr] = 1 wordList.append((ranWeight, ranStr)) T.addWord(wordStr=ranStr, weight=ranWeight) #sort wordList in desceding weight wordList.sort(reverse=True) result = [] searchStr = 't' for index in range(len(wordList)): if len(result) == 5: break else: current = wordList[index][1] if current[0] == searchStr: result.append(wordList[index]) #check the two have same results self.assertEqual(result, ac.autocomplete('t', T, 5))
def test_delete_term(): # test2_auto # case 1: node has no children assert autocomplete_me.Trie(test2_auto).searchTrie("in").maxweight == 20 autocomplete_me.delete_term(test2_auto, "inn") assert autocomplete_me.autocomplete(test2_auto, "i", 50) == [(5,'in')] assert autocomplete_me.Trie(test2_auto).searchTrie("inn") == False assert autocomplete_me.Trie(test2_auto).searchTrie("in").maxweight == 5 # pokemon_auto # case 2: node has children autocomplete_me.delete_term(pokemon_auto, "Porygon2") assert autocomplete_me.autocomplete(pokemon_auto, "Pory", 2) == [(83878,"Porygon-Z"), (533, "Porygon")] assert autocomplete_me.Trie(pokemon_auto).searchTrie("Porygon2") == False assert autocomplete_me.Trie(pokemon_auto).searchTrie("Porygon").maxweight == 83878 autocomplete_me.delete_term(pokemon_auto, "Porygon-Z") assert autocomplete_me.autocomplete(pokemon_auto, "Pory", 2) == [(533, "Porygon")]
def test_autocomplete_for_movies(self): movTrie = ac.read_terms('movies.txt') result = ac.autocomplete("The", movTrie, 5) expected = [(623357910, 'The Avengers (2012)'), (534858444, 'The Dark Knight (2008)'), (448139099, 'The Dark Knight Rises (2012)'), (422783777, 'The Lion King (1994)'), (408010692, 'The Hunger Games (2012)')] self.assertEqual(expected, result)
def test_wordList(self): # test if len(wordList)=len(children) when len(children)<k trie = autocomplete_me.Trie() trie.insert(Node(weight=123, word="apple")) trie.insert(Node(weight=234, word="apples")) trie.insert(Node(weight=67, word="applet")) trie.insert(Node(weight=88, word="appletree")) self.assertEqual(len(autocomplete_me.autocomplete("apple", trie, 6)), 4)
def test_insert_or_update(): # updating existing words autocomplete_me.insert_or_update(test2_auto, "can", 23) assert autocomplete_me.Trie(test2_auto).searchTrie("can").weight == 16 assert autocomplete_me.Trie(test2_auto).searchTrie("ca").maxweight == 16 autocomplete_me.insert_or_update(test2_auto, "cat", 100) autocomplete_me.insert_or_update(test2_auto, "cat", 1000) # make sure the maxweight is updated as well assert autocomplete_me.Trie(test2_auto).searchTrie("cat").weight == 17 assert autocomplete_me.Trie(test2_auto).searchTrie("ca").maxweight == 17 # inserting new words # test2_auto autocomplete_me.insert_or_update(test2_auto, "hermione", 919) assert autocomplete_me.autocomplete(test2_auto, "her", 1) == [(920,'hermione')] assert autocomplete_me.autocomplete(test2_auto, "he", 3) == [(920,'hermione'),(67,'her')] assert autocomplete_me.Trie(test2_auto).searchTrie("her").maxweight == 920 assert autocomplete_me.Trie(test2_auto).searchTrie("her").weight == 67 # pokemon_auto autocomplete_me.insert_or_update(pokemon_auto, "jinhowchong", 19960308) assert autocomplete_me.autocomplete(pokemon_auto, "jin", 5) == [(19960308,'jinhowchong')] assert autocomplete_me.Trie(pokemon_auto).searchTrie("jinh").maxweight == 19960308 assert autocomplete_me.Trie(pokemon_auto).searchTrie("jinhowchong").weight == 19960308
class graph: def random_file(newfile, filename="movies.txt", encoding='UTF-8'): ''' creates a text file that is a random subset of a given file filename is the original file's name newfile is the name of the random subset ''' with open(filename, "r", encoding=encoding) as file: allLines = file.readlines() fileSize = random.randrange(10000, 100000) lines = random.sample(allLines[1:], fileSize) with open(newfile, "w", encoding=encoding) as file: for line in lines: strLine = str(line) file.write(strLine) return [newfile, fileSize] time1, time2, size = [], [], [] for i in range(30): [randFile, fileSize] = random_file("rand_i", "movies.txt") #times building trie and records time to list start_i = time.clock() trie = ac.read_terms(randFile) size_i = fileSize #adds filesize to size list size.append(size_i) running1 = time.clock() - start_i #times matcher and records time to list time1.append(running1) newStart_i = time.clock() ac.autocomplete('T', trie, 5) running2 = time.clock() - newStart_i time2.append(running2) plt.title('Performance by Input Size') plt.xlabel('Input Size') plt.ylabel('Execution Time (s)') plt.grid(True) plt.xlim(10000, 100000) plt.ylim(0, 10) #plot filesize against two time lists with legends plt.scatter(size, time1, color="red", label="Loading data") plt.scatter(size, time2, color="blue", label="Matcher") plt.legend() plt.show()
def test_auto(self): # test autocomplete function self.assertEqual(autocomplete_me.autocomplete("t", wikTrie, 5), ([(5627187200, 'the'), (2595609600, 'to'), (1107331800, 'that'), (401542500, 'this'), (334039800, 'they')])) self.assertEqual(autocomplete_me.autocomplete("th", wikTrie, 5), ([(5627187200, 'the'), (1107331800, 'that'), (401542500, 'this'), (334039800, 'they'), (282026500, 'their')])) self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx", wikTrie, 5) self.assertRaises(ValueError, autocomplete_me.autocomplete, " ", wikTrie, 5) self.assertEqual(autocomplete_me.autocomplete("S", pokTrie, 5), ([(2194440, 'Scizor'), (1211390, 'Starmie'), (993018, 'Skarmory'), (981131, 'Salamence'), (232622, 'Sableye')])) self.assertEqual(autocomplete_me.autocomplete("Sh", pokTrie, 5), ([(81075, 'Sharpedo'), (55024, 'Shedinja'), (43597, 'Shaymin'), (42367, 'Shuckle'), (31091, 'Shiftry')])) self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx", pokTrie, 5) self.assertRaises(ValueError, autocomplete_me.autocomplete, " ", pokTrie, 5) self.assertEqual(autocomplete_me.autocomplete("L", babTrie, 5), ([(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'), (9319, 'Landon'), (8930, 'Luke')])) self.assertEqual(autocomplete_me.autocomplete("Li", babTrie, 5), ([(16709, 'Liam'), (7899, 'Lily'), (7105, 'Lillian'), (2915, 'Lincoln'), (2759, 'Lilly')])) self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx", babTrie, 5) self.assertRaises(ValueError, autocomplete_me.autocomplete, " ", babTrie, 5) self.assertEqual(autocomplete_me.autocomplete("T", movTrie, 5), ([(658672302, 'Titanic (1997)'), (623357910, 'The Avengers (2012)'), (534858444, 'The Dark Knight (2008)'), (448139099, 'The Dark Knight Rises (2012)'), (422783777, 'The Lion King (1994)')])) self.assertEqual(autocomplete_me.autocomplete("The", movTrie, 5), ([(623357910, 'The Avengers (2012)'), (534858444, 'The Dark Knight (2008)'), (448139099, 'The Dark Knight Rises (2012)'), (422783777, 'The Lion King (1994)'), (408010692, 'The Hunger Games (2012)')])) self.assertEqual( autocomplete_me.autocomplete("Star Wars", movTrie, 5), ([(460935665, 'Star Wars (1977)'), (380262555, 'Star Wars: Episode III - Revenge of the Sith (2005)'), (310675583, 'Star Wars: Episode II - Attack of the Clones (2002)'), (309125409, 'Star Wars: Episode VI - Return of the Jedi (1983)'), (290475067, 'Star Wars: Episode V - The Empire Strikes Back (1980)')])) self.assertRaises(LookupError, autocomplete_me.autocomplete, "xxx", movTrie, 5) self.assertRaises(ValueError, autocomplete_me.autocomplete, " ", movTrie, 5)
def test_autocomplete_for_babynames(self): babyTrie = ac.read_terms('baby-names.txt') result = ac.autocomplete("L", babyTrie, 5) expected = [(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'), (9319, 'Landon'), (8930, 'Luke')] self.assertEqual(expected, result)
def test_autocomplete_for_input_not_in_trie(self): babyTrie = ac.read_terms('baby-names.txt') result = ac.autocomplete("XXX", babyTrie, 5) expected = [] self.assertEqual(expected, result)
def test_autocomplete_for_pokemon(self): pokTrie = ac.read_terms("pokemon.txt") result = ac.autocomplete("Sh", pokTrie, 5) expected = [(81075, 'Sharpedo'), (55024, 'Shedinja'), (43597, 'Shaymin'), (42367, 'Shuckle'), (31091, 'Shiftry')] self.assertEqual(expected, result)
def test_autocomplete(): assert autocomplete_me.slowcomplete(test_slow, "sab", 1) == autocomplete_me.autocomplete(test_auto, "sab", 1) assert autocomplete_me.slowcomplete(test_slow, "c", 3) == autocomplete_me.autocomplete(test_auto, "c", 3) assert autocomplete_me.slowcomplete(test_slow, "invalidprefix", 5) == autocomplete_me.autocomplete(test_auto, "invalidprefix", 5) assert autocomplete_me.slowcomplete(pokemon_slow, "Z", 1) == autocomplete_me.autocomplete(pokemon_auto, "Z", 1) assert autocomplete_me.slowcomplete(pokemon_slow, "Po", 3) == autocomplete_me.autocomplete(pokemon_auto, "Po", 3) assert autocomplete_me.slowcomplete(pokemon_slow, " BLAH", 5) == autocomplete_me.autocomplete(pokemon_auto, " BLAH", 5) assert autocomplete_me.slowcomplete(babies_slow, "H", 1) == autocomplete_me.autocomplete(babies_auto, "H", 1) assert autocomplete_me.slowcomplete(babies_slow, "Her", 3) == autocomplete_me.autocomplete(babies_auto, "Her", 3) assert autocomplete_me.slowcomplete(babies_slow, "Sabrin", 5) == autocomplete_me.autocomplete(babies_auto, "Sabrin", 5) assert autocomplete_me.slowcomplete(mandarin_slow, "", 1) == autocomplete_me.autocomplete(mandarin_auto, "", 1) assert autocomplete_me.slowcomplete(mandarin_slow, "永久", 3) == autocomplete_me.autocomplete(mandarin_auto, "永久", 3) assert autocomplete_me.slowcomplete(mandarin_slow, "Hermione", 5) == autocomplete_me.autocomplete(mandarin_auto, "Hermione", 5) assert autocomplete_me.slowcomplete(trademarks_slow, "HOME", 1) != autocomplete_me.autocomplete(trademarks_auto, "Home", 1) assert autocomplete_me.slowcomplete(trademarks_slow, "", 3) == autocomplete_me.autocomplete(trademarks_auto, "", 3) assert autocomplete_me.slowcomplete(trademarks_slow, "invalidprefix", 5) == autocomplete_me.autocomplete(trademarks_auto, "invalidprefix", 5)
import autocomplete_me as ac import time import cProfile #times read_terms() process begin1 = time.clock() words = ac.read_terms("movies.txt") stop1 = time.clock() diff1= stop1 - begin1 print("Building the Trie takes: {}".format(diff1)) print("\n") #times autocomplete() process begin2 = time.clock() ac.autocomplete("The", words, 5) stop2 = time.clock() diff2 = stop2 - begin2 print("Finishing autocomplete for 'The' takes: {}".format(diff2)) print("\n") #times all functions separately with "The" as search string and "movies.txt" # as text input cProfile.run('ac.autocomplete("The", ac.read_terms("movies.txt"), 5)')
def test_autocomplete_for_empty_input(self): pokTrie = ac.read_terms("pokemon.txt") result = ac.autocomplete("", pokTrie, 5) expected = [] self.assertEqual(expected, result)