Esempio n. 1
0
    def generate(self):
        '''
        generate the graph based on the filename and the topn inputed.
        '''
        print(self.entered_filename)
        file = Document(self.entered_filename)
        file.generateWhole()

        wordlist = file.wordlist
        worddict = BasicStats.createFreqMap(wordlist)
        n = self.entered_number
        topdict = BasicStats.topN(worddict, n)
        lista = [[], []]
        for i in topdict:
            lista[0] += [i]  #words
            lista[1] += [topdict[i]]  #frequency
        print(lista)
        a = MatPlotPloter()
        a.scatterPlot(range(len(lista[1])), lista[1])
Esempio n. 2
0
def main():
    filename = input('Please input a filename:    ')
    fileA = Document(filename)
    title = fileA.generateWhole()
    wordlist = fileA.wordlist
    o = 'The time required to do top 50 using dictionary: \n'

    worddict = BasicStats.createFreqMap(wordlist)
    n = 50
    a = time.time()
    topdict = BasicStats.topN(worddict, int(n))
    b = time.time()
    o += str(b - a) + '\n'

    o += 'The time required to do top 50 using heap: \n'
    c = time.time()

    k = BasicStats.HTopNBottomN(worddict, int(n))
    d = time.time()
    o += str(d - c) + '\n'

    o += '\nMax 50\n'
    for i in range(1, 51):
        o += str(k[1][i]) + ' ' + str(k[0][i]) + '\n'

    o += '\nMin 50\n'
    for i in range(1, 51):
        o += str(k[3][i]) + ' ' + str(k[2][i]) + '\n'

    lista = [[], []]
    for i in topdict:
        lista[0] += [i]  #words
        lista[1] += [topdict[i]]  #frequency
    graph = CommandLinePlotter.Scatter2D(lista[1])
    timefile = open('Top50TIMEFILE' + '-' + filename, 'wt', encoding='UTF-8')
    for j in o:
        timefile.write(j)
    timefile.close()