-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
54 lines (42 loc) · 1.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from Document import Document
from DocumentStream import DocumentStream
from Sentence import Sentence
from DocumentStreamError import DocumentStreamError
from CommandLinePlotter import CommandLinePlotter
from BasicStats import BasicStats
import time
from Heap import *
def main():
filename = input('Please input a filename: ')
fileA = Document(filename)
title = fileA.generateWhole()
wordlist = fileA.wordlist
o = 'The time required to do top 50 using dictionary: \n'
worddict = BasicStats.createFreqMap(wordlist)
n = 50
a = time.time()
topdict = BasicStats.topN(worddict,int(n))
b = time.time()
o += str(b-a) + '\n'
o+= 'The time required to do top 50 using heap: \n'
c = time.time()
k = BasicStats.HTopNBottomN(worddict, int(n))
d = time.time()
o += str(d - c) + '\n'
o += '\nMax 50\n'
for i in range(1,51):
o += str(k[1][i]) + ' ' + str(k[0][i]) + '\n'
o += '\nMin 50\n'
for i in range(1,51):
o += str(k[3][i]) + ' ' + str(k[2][i]) + '\n'
lista = [[],[]]
for i in topdict:
lista[0] += [i] #words
lista[1] += [topdict[i]] #frequency
graph = CommandLinePlotter.Scatter2D(lista[1])
timefile= open('Top50TIMEFILE'+'-'+filename,'wt', encoding = 'UTF-8')
for j in o:
timefile.write(j)
timefile.close()
if __name__ == '__main__':
main()