コード例 #1
0
def show_content_cut():
    try:
        filename = os.path.join(texts_path, 'document_segment.txt')
        editor(filename)
    except FileNotFoundError:
        messagebox.showinfo('Message', '没有分词后的文件!')
        return
コード例 #2
0
def show_hot_titles():
    all_title = '\n'.join(df_rank_i['title_'].tolist())
    hot_titles = modeling.get_key_sentences(all_title, num=200)
    news_pandas.save_text(hot_titles, os.path.join(texts_path,
                                                   'hot_titles.txt'))
    filename = os.path.join(texts_path, 'hot_titles.txt')
    editor(filename)
コード例 #3
0
def show_hot_words_details():
    top_words_list = counter.get_most_common_words(df_rank_i['content_cut'],
                                                   top_n=5000,
                                                   min_frequency=1)
    top_words = '\n'.join(top_words_list)
    news_pandas.save_text(top_words, os.path.join(texts_path, 'top_words.txt'))
    filename = os.path.join(texts_path, 'top_words.txt')
    editor(filename)
コード例 #4
0
def show_wordlib():
    try:
        document_segment = news_pandas.load_text(os.path.join(texts_path, 'document_segment.txt'))
    except FileNotFoundError:
        messagebox.showinfo('Message', '没有分词后的文件!')
        return
    words = document_segment.split()
    word_library = counter.get_most_common_words(words)
    word_library = [word for word in word_library if re.match(r'^[0-9A-Za-z\u4E00-\u9FFF]+$', word)]
    word_library = '\n'.join(word_library)
    news_pandas.save_text(word_library, os.path.join(texts_path, 'word_library.txt'))
    filename = os.path.join(texts_path, 'word_library.txt')
    editor(filename)
コード例 #5
0
def wordsimilar():
    model = news_pandas.load_element(os.path.join(models_path, 'word2vec_model.pkl'))
    word = Entry_Word.get()
    if word == '':
        messagebox.showinfo('Message', '请输入词语!')
        return
    try:
        model.wv.get_vector(word)
    except:
        messagebox.showinfo('Message', '词库中没有此词语!')
        return
    similar_words = model.wv.most_similar(word, topn=100)
    similar_words = str(similar_words)[1:-1]
    similar_words = re.sub(r'\), \(', '),\n(', similar_words)
    news_pandas.save_text(similar_words, os.path.join(texts_path, 'similar_words.txt'))
    filename = os.path.join(texts_path, 'similar_words.txt')
    editor(filename)
コード例 #6
0
 def show_cluster_n_words():
     n = Entry_Cluster_N.get()
     if n == '':
         messagebox.showinfo('Message', '请先输入想要查看的词汇属于第几类!')
         return
     n = int(n)
     try:
         word_df = news_pandas.load_news(os.path.join(results_path, 'word_df.csv'))
     except FileNotFoundError:
         messagebox.showinfo('Message', '请先对新闻内容文本进行聚类!')
         return
     word_df['wordvec'] = word_df['wordvec'].map(eval)
     words_i_df = word_df[word_df['word_label'] == n - 1].copy()
     cluster_i_words = '\n'.join(words_i_df['word'].tolist())
     news_pandas.save_text(cluster_i_words, os.path.join(texts_path, 'cluster_i_words.txt'))
     filename = os.path.join(texts_path, 'cluster_i_words.txt')
     editor(filename)
コード例 #7
0
def show_singlewords():
    filename = os.path.join(extra_dict_path, 'self_individual_character_dict.txt')
    editor(filename)
コード例 #8
0
def show_disambiguation_dict():
    filename = os.path.join(extra_dict_path, 'self_disambiguation_dict.json')
    editor(filename)
コード例 #9
0
def show_stopwords():
    filename = os.path.join(extra_dict_path, 'self_stop_words.txt')
    editor(filename)
コード例 #10
0
def show_userdict():
    filename = os.path.join(extra_dict_path, 'self_userdict.txt')
    editor(filename)