Ejemplo n.º 1
0
def testRun():

    lukija = WordReader(["../Material/Grimm's Fairy Tales.txt"])

    lukija.readWords()


    punamusta = RedBlack(lukija)
    trie = Trie(lukija)

    print "Adding words from selected material..."
    intti = 0; setti = 0
    for word in lukija.words:
        trie.add(word[0], word[1:])
        punamusta.add(word[0], word[1:])
        intti = intti + 1
        if intti > lukija.wordcount / 100.0:
            setti = setti + 1
            print setti, '% of words added'
            intti = 0

    print "Searching for words in Grimm's Fairy tales and The Adventures of Tom Sawyer"

    word = raw_input( "Find a word (or its beginning) in the text: " ).rstrip( '\n' )

    positions, count, linecount = trie.find(word)
    print "Found", count, "instances (", linecount, "lines) @", positions
    RBpositions, RBcount, RBlinecount = punamusta.find(word)
    print "Found", RBcount, "instances (", RBlinecount, "lines) @", RBpositions
Ejemplo n.º 2
0
        print string + '%20.3f ms' %  (sum(runtimes) / repeats)


if __name__ == "__main__":
    print "Hello World"

    trieAddFile = openFile('trieAddToEmpty', 'w')
    punamustaAddFile = openFile('punamustaAddToEmpty', 'w')
    trieFindLengthFile = openFile('trieFindWordLength', 'w')
    punamustaFindLengthFile = openFile('punamustaFindWordLength', 'w')
    trieFindWordCountFile = openFile('trieFindWordCount', 'w')
    punamustaFindWordCountFile = openFile('punamustaFindWordCoun', 'w')

    lukija = WordReader(["../Material/Grimm's Fairy Tales.txt"])
    lukija.readWords()
    punamusta = RedBlack(lukija)
    trie = Trie(lukija)

    words = pickle.load( open( "randomWordList", "rb" ) ) # indexed by word len

    repeats = 100;
    runtimes = []
    for i in range(2,17):
        runtime = addWordsToEmptyList(trie, 2**i, repeats, '%25s\t%10d\t' % ('trie:add', 2**i), False)
        print '%25s\t%10d\t\t%14.3f ms' % ('trie:addToEmpty', 2**i, runtime)
        trieAddFile.write('%10d\t%8.3f\n' % (2**i, runtime))
        runtimes.append(findWords(trie, words[7], repeats, printout=False))
    for index, runtime in enumerate(runtimes):
        print '%25s\t%10d\t\t%14.3f ms' % ('trie:findWordCount', 2**(index+2), runtime)
        trieFindWordCountFile.write('%10d\t%14.3f\n' % (2**(index+2), runtime))