def show_hot_titles(): all_title = '\n'.join(df_rank_i['title_'].tolist()) hot_titles = modeling.get_key_sentences(all_title, num=200) news_pandas.save_text(hot_titles, os.path.join(texts_path, 'hot_titles.txt')) filename = os.path.join(texts_path, 'hot_titles.txt') editor(filename)
def show_hot_words_details(): top_words_list = counter.get_most_common_words(df_rank_i['content_cut'], top_n=5000, min_frequency=1) top_words = '\n'.join(top_words_list) news_pandas.save_text(top_words, os.path.join(texts_path, 'top_words.txt')) os.system(editor + ' ' + os.path.join(texts_path, 'top_words.txt') + ' &')
def show_wordlib(): try: document_segment = news_pandas.load_text(os.path.join(texts_path, 'document_segment.txt')) except FileNotFoundError: messagebox.showinfo('Message', '没有分词后的文件!') return words = document_segment.split() word_library = counter.get_most_common_words(words) word_library = [word for word in word_library if re.match(r'^[0-9A-Za-z\u4E00-\u9FFF]+$', word)] word_library = '\n'.join(word_library) news_pandas.save_text(word_library, os.path.join(texts_path, 'word_library.txt')) filename = os.path.join(texts_path, 'word_library.txt') editor(filename)
def wordsimilar(): model = news_pandas.load_element(os.path.join(models_path, 'word2vec_model.pkl')) word = Entry_Word.get() if word == '': messagebox.showinfo('Message', '请输入词语!') return try: model.wv.get_vector(word) except: messagebox.showinfo('Message', '词库中没有此词语!') return similar_words = model.wv.most_similar(word, topn=100) similar_words = str(similar_words)[1:-1] similar_words = re.sub(r'\), \(', '),\n(', similar_words) news_pandas.save_text(similar_words, os.path.join(texts_path, 'similar_words.txt')) filename = os.path.join(texts_path, 'similar_words.txt') editor(filename)
def show_cluster_n_words(): n = Entry_Cluster_N.get() if n == '': messagebox.showinfo('Message', '请先输入想要查看的词汇属于第几类!') return n = int(n) try: word_df = news_pandas.load_news(os.path.join(results_path, 'word_df.csv')) except FileNotFoundError: messagebox.showinfo('Message', '请先对新闻内容文本进行聚类!') return word_df['wordvec'] = word_df['wordvec'].map(eval) words_i_df = word_df[word_df['word_label'] == n - 1].copy() cluster_i_words = '\n'.join(words_i_df['word'].tolist()) news_pandas.save_text(cluster_i_words, os.path.join(texts_path, 'cluster_i_words.txt')) filename = os.path.join(texts_path, 'cluster_i_words.txt') editor(filename)