def get_tf(self, text, ones=False): tokens = [i for i in word_tokenize(text) if i in self.df_dict] lemmaFreq = FreqDist(tokens) sum_doc = np.array([lemmaFreq[i] for i in lemmaFreq]).sum() lemmaFreq = sorted(lemmaFreq.items(), key=operator.itemgetter(1)) lemmaFreq.reverse() term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq] # if ones: # term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq if i[1] == 1] #print(term_freq) # else: # term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq if i[1] > 1] #print(term_freq) return term_freq