from classes.PorterStemmer import PorterStemmer from classes.StopWordEliminater import StopWordEliminater from classes.Main import Main main = Main() p = PorterStemmer() e = StopWordEliminater() freq = {} size_of_corpus = 0 for count in range(1, 1400): f = open('cranfieldDocs/cranfield' + main.get_number(count), 'r') message = f.read() f.close() text = main.tag_remove(message) items = main.tokenize(text) items = e.eliminate(items) for item in items: word = p.stem(item, 0, len(item) - 1) size_of_corpus += 1 if word in freq: freq[word] = freq[word] + 1 else: freq[word] = 1 vocab_size = len(freq) print("size of vocab : ", vocab_size) sorted_freq = sorted(freq.items(), key=operator.itemgetter(1), reverse=True)
import operator from classes.Main import Main from classes.PorterStemmer import PorterStemmer main = Main() p = PorterStemmer(); freq = {}; size_of_corpus = 0; for count in range(1, 1400) : f = open('cranfieldDocs/cranfield' + get_number(count), 'r'); message = f.read(); f.close(); text = main.tag_remove(message); itmes = main.tokenize(text); for item in items : word = p.stem(item, 0,len(item)-1) size_of_corpus += 1; if word in freq : freq[word] = freq[word] + 1; else : freq[word] = 1; vocab_size = len(freq); print("size of vocab : ", vocab_size); sorted_freq = sorted(freq.items(), key = operator.itemgetter(1), reverse = True ) # print(sorted_freq)