def test_read(self): # test read_terms function self.assertEqual( autocomplete_me.read_terms("wiktionary.txt").root.weight, -1) self.assertEqual( autocomplete_me.read_terms("wiktionary.txt").root.word, None) self.assertEqual( autocomplete_me.read_terms("wiktionary.txt").root.maxWeight, 5627187200) self.assertRaises(ValueError, autocomplete_me.read_terms, " ")
def test_autocomplete_for_wiki(self): wikiTrie = ac.read_terms("wiktionary.txt") result = ac.autocomplete("the", wikiTrie, 5) expected = [(5627187200, 'the'), (334039800, 'they'), (282026500, 'their'), (250991700, 'them'), (196120000, 'there')] self.assertEqual(expected, result)
def test_autocomplete_for_movies(self): movTrie = ac.read_terms('movies.txt') result = ac.autocomplete("The", movTrie, 5) expected = [(623357910, 'The Avengers (2012)'), (534858444, 'The Dark Knight (2008)'), (448139099, 'The Dark Knight Rises (2012)'), (422783777, 'The Lion King (1994)'), (408010692, 'The Hunger Games (2012)')] self.assertEqual(expected, result)
class graph: def random_file(newfile, filename="movies.txt", encoding='UTF-8'): ''' creates a text file that is a random subset of a given file filename is the original file's name newfile is the name of the random subset ''' with open(filename, "r", encoding=encoding) as file: allLines = file.readlines() fileSize = random.randrange(10000, 100000) lines = random.sample(allLines[1:], fileSize) with open(newfile, "w", encoding=encoding) as file: for line in lines: strLine = str(line) file.write(strLine) return [newfile, fileSize] time1, time2, size = [], [], [] for i in range(30): [randFile, fileSize] = random_file("rand_i", "movies.txt") #times building trie and records time to list start_i = time.clock() trie = ac.read_terms(randFile) size_i = fileSize #adds filesize to size list size.append(size_i) running1 = time.clock() - start_i #times matcher and records time to list time1.append(running1) newStart_i = time.clock() ac.autocomplete('T', trie, 5) running2 = time.clock() - newStart_i time2.append(running2) plt.title('Performance by Input Size') plt.xlabel('Input Size') plt.ylabel('Execution Time (s)') plt.grid(True) plt.xlim(10000, 100000) plt.ylim(0, 10) #plot filesize against two time lists with legends plt.scatter(size, time1, color="red", label="Loading data") plt.scatter(size, time2, color="blue", label="Matcher") plt.legend() plt.show()
words, wordList = [], [] with open(file, 'r') as txt: next(txt) for line in txt: if line != '\n': item = line.strip().split('\t') words.append((int(item[0]), item[1])) words = sorted(words, reverse=True) for item in words: if prefix == item[1][0:len(prefix)] and len(wordList) < k: wordList.append(item) return wordList # four tries wikTrie = autocomplete_me.read_terms('wiktionary.txt') pokTrie = autocomplete_me.read_terms('pokemon.txt') babTrie = autocomplete_me.read_terms('baby-names.txt') movTrie = autocomplete_me.read_terms('movies.txt') # random tries randFile = create_random_terms('random.txt', 2000) randTrie = autocomplete_me.read_terms(randFile) if __name__ == '__main__': unittest.main()
import autocomplete_me as ac import time import cProfile #times read_terms() process begin1 = time.clock() words = ac.read_terms("movies.txt") stop1 = time.clock() diff1= stop1 - begin1 print("Building the Trie takes: {}".format(diff1)) print("\n") #times autocomplete() process begin2 = time.clock() ac.autocomplete("The", words, 5) stop2 = time.clock() diff2 = stop2 - begin2 print("Finishing autocomplete for 'The' takes: {}".format(diff2)) print("\n") #times all functions separately with "The" as search string and "movies.txt" # as text input cProfile.run('ac.autocomplete("The", ac.read_terms("movies.txt"), 5)')
def test_autocomplete_for_empty_input(self): pokTrie = ac.read_terms("pokemon.txt") result = ac.autocomplete("", pokTrie, 5) expected = [] self.assertEqual(expected, result)
def test_autocomplete_for_input_not_in_trie(self): babyTrie = ac.read_terms('baby-names.txt') result = ac.autocomplete("XXX", babyTrie, 5) expected = [] self.assertEqual(expected, result)
def test_autocomplete_for_babynames(self): babyTrie = ac.read_terms('baby-names.txt') result = ac.autocomplete("L", babyTrie, 5) expected = [(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'), (9319, 'Landon'), (8930, 'Luke')] self.assertEqual(expected, result)
def test_autocomplete_for_pokemon(self): pokTrie = ac.read_terms("pokemon.txt") result = ac.autocomplete("Sh", pokTrie, 5) expected = [(81075, 'Sharpedo'), (55024, 'Shedinja'), (43597, 'Shaymin'), (42367, 'Shuckle'), (31091, 'Shiftry')] self.assertEqual(expected, result)