def test_read(self):
     # test read_terms function
     self.assertEqual(
         autocomplete_me.read_terms("wiktionary.txt").root.weight, -1)
     self.assertEqual(
         autocomplete_me.read_terms("wiktionary.txt").root.word, None)
     self.assertEqual(
         autocomplete_me.read_terms("wiktionary.txt").root.maxWeight,
         5627187200)
     self.assertRaises(ValueError, autocomplete_me.read_terms, "  ")
Ejemplo n.º 2
0
 def test_autocomplete_for_wiki(self):
     wikiTrie = ac.read_terms("wiktionary.txt")
     result = ac.autocomplete("the", wikiTrie, 5)
     expected = [(5627187200, 'the'), (334039800, 'they'),
                 (282026500, 'their'), (250991700, 'them'),
                 (196120000, 'there')]
     self.assertEqual(expected, result)
Ejemplo n.º 3
0
 def test_autocomplete_for_movies(self):
     movTrie = ac.read_terms('movies.txt')
     result = ac.autocomplete("The", movTrie, 5)
     expected = [(623357910, 'The Avengers (2012)'),
                 (534858444, 'The Dark Knight (2008)'),
                 (448139099, 'The Dark Knight Rises (2012)'),
                 (422783777, 'The Lion King (1994)'),
                 (408010692, 'The Hunger Games (2012)')]
     self.assertEqual(expected, result)
class graph:
    def random_file(newfile, filename="movies.txt", encoding='UTF-8'):
        ''' creates a text file that is a random subset of a given file
            filename is the original file's name
            newfile is the name of the random subset
        '''
        with open(filename, "r", encoding=encoding) as file:
            allLines = file.readlines()
            fileSize = random.randrange(10000, 100000)
            lines = random.sample(allLines[1:], fileSize)

        with open(newfile, "w", encoding=encoding) as file:
            for line in lines:
                strLine = str(line)
                file.write(strLine)
        return [newfile, fileSize]

    time1, time2, size = [], [], []
    for i in range(30):
        [randFile, fileSize] = random_file("rand_i", "movies.txt")
        #times building trie and records time to list
        start_i = time.clock()
        trie = ac.read_terms(randFile)
        size_i = fileSize
        #adds filesize to size list
        size.append(size_i)
        running1 = time.clock() - start_i
        #times matcher and records time to list
        time1.append(running1)
        newStart_i = time.clock()
        ac.autocomplete('T', trie, 5)
        running2 = time.clock() - newStart_i
        time2.append(running2)

    plt.title('Performance by Input Size')
    plt.xlabel('Input Size')
    plt.ylabel('Execution Time (s)')
    plt.grid(True)
    plt.xlim(10000, 100000)
    plt.ylim(0, 10)

    #plot filesize against two time lists with legends
    plt.scatter(size, time1, color="red", label="Loading data")
    plt.scatter(size, time2, color="blue", label="Matcher")
    plt.legend()

    plt.show()
    words, wordList = [], []

    with open(file, 'r') as txt:
        next(txt)
        for line in txt:
            if line != '\n':
                item = line.strip().split('\t')
                words.append((int(item[0]), item[1]))

    words = sorted(words, reverse=True)

    for item in words:
        if prefix == item[1][0:len(prefix)] and len(wordList) < k:
            wordList.append(item)

    return wordList


# four tries
wikTrie = autocomplete_me.read_terms('wiktionary.txt')
pokTrie = autocomplete_me.read_terms('pokemon.txt')
babTrie = autocomplete_me.read_terms('baby-names.txt')
movTrie = autocomplete_me.read_terms('movies.txt')

# random tries
randFile = create_random_terms('random.txt', 2000)
randTrie = autocomplete_me.read_terms(randFile)

if __name__ == '__main__':
    unittest.main()
Ejemplo n.º 6
0
import autocomplete_me as ac
import time
import cProfile

#times read_terms() process   
begin1 = time.clock()
words = ac.read_terms("movies.txt")
stop1 = time.clock()
diff1= stop1 - begin1
print("Building the Trie takes: {}".format(diff1))
print("\n")

#times autocomplete() process
begin2 = time.clock()
ac.autocomplete("The", words, 5)
stop2 = time.clock()
diff2 = stop2 - begin2
print("Finishing autocomplete for 'The' takes: {}".format(diff2))
print("\n")

#times all functions separately with "The" as search string and "movies.txt"
# as text input
cProfile.run('ac.autocomplete("The", ac.read_terms("movies.txt"), 5)')
Ejemplo n.º 7
0
 def test_autocomplete_for_empty_input(self):
     pokTrie = ac.read_terms("pokemon.txt")
     result = ac.autocomplete("", pokTrie, 5)
     expected = []
     self.assertEqual(expected, result)
Ejemplo n.º 8
0
 def test_autocomplete_for_input_not_in_trie(self):
     babyTrie = ac.read_terms('baby-names.txt')
     result = ac.autocomplete("XXX", babyTrie, 5)
     expected = []
     self.assertEqual(expected, result)
Ejemplo n.º 9
0
 def test_autocomplete_for_babynames(self):
     babyTrie = ac.read_terms('baby-names.txt')
     result = ac.autocomplete("L", babyTrie, 5)
     expected = [(16709, 'Liam'), (13066, 'Logan'), (10623, 'Lucas'),
                 (9319, 'Landon'), (8930, 'Luke')]
     self.assertEqual(expected, result)
Ejemplo n.º 10
0
 def test_autocomplete_for_pokemon(self):
     pokTrie = ac.read_terms("pokemon.txt")
     result = ac.autocomplete("Sh", pokTrie, 5)
     expected = [(81075, 'Sharpedo'), (55024, 'Shedinja'),
                 (43597, 'Shaymin'), (42367, 'Shuckle'), (31091, 'Shiftry')]
     self.assertEqual(expected, result)