def __init__(self, ): self.sign_dict = preload.load_sign_dict(SIGN_DICT_PATH, LOCATION_PATH) self.num_dict, self.num_syn_dict = preload.load_extend_dict( NUM_WORD_PATH) self.char_dict = preload.load_sign_dict(CHAR_WORD_PATH) self.syn_dict = preload.load_syn_dict(SYNONYM_DICT_PATH) self.seg_dict = preload.load_seg_dict(SEG_DICT_PATH) self.extend_dict, self.extend_syn_dict = preload.load_extend_dict( EXTEND_DICT_PATH) self.seg_syn_dict = preload.load_seg_syn_dict(SEG_SYN_PATH, self.seg_dict) self.stop_words = preload.load_stop_words(STOP_WORDS_PATH) self.unfilter_words = preload.load_unfilter_words(UNFILTER_WORDS_PATH) self.emotion_dict = preload.load_emotion_dict(EMOTION_DICT) self.visual_info = preload.load_visualization_info(VISUALIZATION_INFO) self.polysemy_info = preload.load_polysemy_info(POLYSEMY_INFO) self.poetry_info = preload.load_poetry_info(POETRY_DICT) # self.wsd_crf = crf() self.sentence = '' pynlpir.open() nlpir.ImportUserDict(USER_DICT.encode('utf-8'), 1) # 导入用户自定义词典
jieba.load_userdict(dict_file) FILE_NAME_JIEBA = FILE_NAME + "_JIEBA" FILE_NAME_JIEBA_CNT = FILE_NAME_JIEBA + "_CNT" FILE_NAME_JIEBA_LM = FILE_NAME_JIEBA + "_LM" FILE_NAME_JIEBA_PK = FILE_NAME_JIEBA + "_PK" FILE_NAME_JIEBA_PINYIN = FILE_NAME_JIEBA + "_PINYIN" elif USE_SEGMENT == "ICTCLAS": import pynlpir from pynlpir import nlpir pynlpir.open() dict_file = b'./data/MY_DICT.dat' nlpir.ImportUserDict(dict_file) dict_file = b'./data/TYCCL.dat_DICT' nlpir.ImportUserDict(dict_file) FILE_NAME_JIEBA = FILE_NAME + "_ICTCLAS" FILE_NAME_JIEBA_CNT = FILE_NAME_JIEBA + "_CNT" FILE_NAME_JIEBA_LM = FILE_NAME_JIEBA + "_LM" FILE_NAME_JIEBA_PK = FILE_NAME_JIEBA + "_PK" FILE_NAME_JIEBA_PINYIN = FILE_NAME_JIEBA + "_PINYIN" tyccl_list_name = './data/TYCCL.dat_LIST' tyccl_mean_name = './data/TYCCL.dat_MEAN' JIEBA_HZ = {} if not os.path.exists(FILE_NAME_JIEBA_PK): print("请计算产生词频数据!")
def import_userdict(self, dict_path): nlpir.ImportUserDict(dict_path)
def import_user_dict(path): print "Before Import User Dict: ", time.strftime('%Y-%m-%d %H:%M:%S') n = nlpir.ImportUserDict(os.path.join(TEXT_RESOURCE, path)) nlpir.SaveTheUsrDic() print "Success Import: ", n print "Done: ", time.strftime('%Y-%m-%d %H:%M:%S')