def TFIDF(df, word_list):
    '''
    调用前面写的tfidf算法来训练矩阵
    :return: 返回TFIDF矩阵
    '''
    df = read_file("classification_simple_test.csv", [0])
    data = [a[0].split() for a in np.array(df).tolist()]
    IDF_list = T.IDF(data, word_list)  # 计算idf值
    TFIDF_met = np.empty(shape=(len(data), len(word_list)))
    for i, record in enumerate(data):
        TF_dic = T.TF(record)
        for index, word in enumerate(word_list):
            TFIDF_met[i][index] = TF_dic.get(word, 0) * IDF_list[index]
    return TFIDF_met
예제 #2
0
    def __init__(self, span, d, iterations, index):
        self.kspan = span  # 共现窗口的长度
        self.d = d
        self.iteration = iterations

        # ---- TF-IDF 初始化 ---- #
        docs = dataset.read_sogou()
        for i, doc in enumerate(docs):
            docs[i] = TFIDF.cut_by_words(doc)
        data, self.VSM = TFIDF.create_VSM(docs)
        print("Create VSM Over !")
        # 生成 IDF
        self.idf = TFIDF.IDF(data, self.VSM)
        self.tfidf = TFIDF.TF_IDF(data[index], self.VSM, self.idf)
        self.keywords_doc = TFIDF.extract_keywords_tfidf(self.tfidf, self.VSM)

        print("Got the keywors from the docs !")