Example #1
0
    def tfidf_basic(self):
        vectorizer = CountVectorizer()
        transformer = TfidfTransformer()
        X = vectorizer.fit_transform(self.corpus)
        print("\nTransform Matric: ")
        print(X.toarray())
        print("\nTransform Matric shape: ")
        print (X.shape)

        words = vectorizer.get_features() #所有文章的關鍵字
        print ("\nAll feature(keywords)所有文章的字: ")
        print (words)

        # Matrix with one row per document and one column per token (e.g. word) occurring in the corpus.
        # tfidf_matrix: [n_samples, n_features_new]
        tfidf_matrix = transformer.fit_transform(X)

        tfidf_weight = tfidf_matrix.toarray()  #對應的tfidf矩陣
        print ("\ntf-idf Matric: ")
        print (tfidf_weight)
        print (tfidf_weight.shape) # 4 * 9
        return [words, tfidf_weight]