def cluster_word(): n_clusters = Entry_N_Clusters.get() if n_clusters == '': messagebox.showinfo('Message', '请输入词汇聚类的类别数!') return n_clusters = int(n_clusters) top_words_list = counter.get_most_common_words( df_rank_i['content_cut'], top_n=5000, min_frequency=1) model = news_pandas.load_element( os.path.join(models_path, 'word2vec_model.pkl')) word_list, wordvec_list = modeling.get_word_and_wordvec( model, top_words_list) kmeans = modeling.get_cluster(wordvec_list, cluster='KMeans', cluster_args={ 'n_clusters': n_clusters, 'random_state': 9 }) word_label = kmeans.labels_ word_df = pd.DataFrame() word_df['word'] = word_list word_df['wordvec'] = wordvec_list word_df['word_label'] = word_label news_pandas.save_news(word_df, os.path.join(results_path, 'word_df.csv')) messagebox.showinfo('Message', '词汇聚类完成!')
def wordsimilar(): model = news_pandas.load_element(os.path.join(models_path, 'word2vec_model.pkl')) word = Entry_Word.get() if word == '': messagebox.showinfo('Message', '请输入词语!') return try: model.wv.get_vector(word) except: messagebox.showinfo('Message', '词库中没有此词语!') return similar_words = model.wv.most_similar(word, topn=100) similar_words = str(similar_words)[1:-1] similar_words = re.sub(r'\), \(', '),\n(', similar_words) news_pandas.save_text(similar_words, os.path.join(texts_path, 'similar_words.txt')) filename = os.path.join(texts_path, 'similar_words.txt') editor(filename)