Example #1
0
    def get_tf(self, text, ones=False):

        tokens = [i for i in word_tokenize(text) if i in self.df_dict]

        lemmaFreq = FreqDist(tokens)
        sum_doc = np.array([lemmaFreq[i] for i in lemmaFreq]).sum()
        lemmaFreq = sorted(lemmaFreq.items(), key=operator.itemgetter(1))
        lemmaFreq.reverse()

        term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq]

        # if ones:
        #     term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq if i[1] == 1]
        #print(term_freq)
        # else:
        #     term_freq = [(i[0], i[1], round(i[1] / sum_doc, 5)) for i in lemmaFreq if i[1] > 1]
        #print(term_freq)
        return term_freq