Ejemplo n.º 1
0
def __gen_words_dict_nyt():
    # data_dir = 'e:/data/emadr/nyt-world-full/processed'
    data_dir = 'e:/data/emadr/nyt-less-docs/business'
    word_cnt_file_name = os.path.join(data_dir, 'word-cnts-lc.txt')
    stop_words_file_name = 'e:/data/common-res/stopwords.txt'
    dst_file_name = os.path.join(data_dir, 'words-dict-proper.txt')
    textutils.gen_proper_words_dict_with_cnts(word_cnt_file_name,
                                              stop_words_file_name, 2, 20,
                                              dst_file_name)
Ejemplo n.º 2
0
def gen_words_dict_nyt():
    word_cnt_file_name = 'e:/dc/nyt-world-full/processed/word_cnts_lc.txt'
    stop_words_file_name = 'e:/common_res/stopwords.txt'
    dst_file_name = 'e:/dc/nyt-world-full/processed/words_dict_proper.txt'
    textutils.gen_proper_words_dict_with_cnts(word_cnt_file_name, stop_words_file_name, 2, 20,
                                              dst_file_name)
Ejemplo n.º 3
0
def gen_words_dict_wiki():
    word_cnt_file_name = 'e:/dc/el/wiki/wiki_word_cnts_lc.txt'
    stop_words_file_name = 'e:/common_res/stopwords.txt'
    dst_file_name = 'e:/dc/el/wiki/words_dict_proper.txt'
    textutils.gen_proper_words_dict_with_cnts(word_cnt_file_name, stop_words_file_name, 4, 20,
                                              dst_file_name)
Ejemplo n.º 4
0
def gen_words_dict_wiki():
    word_cnt_file_name = 'e:/dc/el/wiki/wiki_word_cnts_lc.txt'
    stop_words_file_name = 'e:/common_res/stopwords.txt'
    dst_file_name = 'e:/dc/el/wiki/words_dict_proper.txt'
    textutils.gen_proper_words_dict_with_cnts(word_cnt_file_name, stop_words_file_name, 4, 20,
                                              dst_file_name)