Beispiel #1
0
 def __init__(self, ):
     self.sign_dict = preload.load_sign_dict(SIGN_DICT_PATH, LOCATION_PATH)
     self.num_dict, self.num_syn_dict = preload.load_extend_dict(
         NUM_WORD_PATH)
     self.char_dict = preload.load_sign_dict(CHAR_WORD_PATH)
     self.syn_dict = preload.load_syn_dict(SYNONYM_DICT_PATH)
     self.seg_dict = preload.load_seg_dict(SEG_DICT_PATH)
     self.extend_dict, self.extend_syn_dict = preload.load_extend_dict(
         EXTEND_DICT_PATH)
     self.seg_syn_dict = preload.load_seg_syn_dict(SEG_SYN_PATH,
                                                   self.seg_dict)
     self.stop_words = preload.load_stop_words(STOP_WORDS_PATH)
     self.unfilter_words = preload.load_unfilter_words(UNFILTER_WORDS_PATH)
     self.emotion_dict = preload.load_emotion_dict(EMOTION_DICT)
     self.visual_info = preload.load_visualization_info(VISUALIZATION_INFO)
     self.polysemy_info = preload.load_polysemy_info(POLYSEMY_INFO)
     self.poetry_info = preload.load_poetry_info(POETRY_DICT)
     # self.wsd_crf = crf()
     self.sentence = ''
     pynlpir.open()
     nlpir.ImportUserDict(USER_DICT.encode('utf-8'), 1)  # 导入用户自定义词典
    jieba.load_userdict(dict_file)

    FILE_NAME_JIEBA = FILE_NAME + "_JIEBA"
    FILE_NAME_JIEBA_CNT = FILE_NAME_JIEBA + "_CNT"
    FILE_NAME_JIEBA_LM = FILE_NAME_JIEBA + "_LM"
    FILE_NAME_JIEBA_PK = FILE_NAME_JIEBA + "_PK"
    FILE_NAME_JIEBA_PINYIN = FILE_NAME_JIEBA + "_PINYIN"
elif USE_SEGMENT == "ICTCLAS":

    import pynlpir
    from pynlpir import nlpir

    pynlpir.open()

    dict_file = b'./data/MY_DICT.dat'
    nlpir.ImportUserDict(dict_file)
    dict_file = b'./data/TYCCL.dat_DICT'
    nlpir.ImportUserDict(dict_file)

    FILE_NAME_JIEBA = FILE_NAME + "_ICTCLAS"
    FILE_NAME_JIEBA_CNT = FILE_NAME_JIEBA + "_CNT"
    FILE_NAME_JIEBA_LM = FILE_NAME_JIEBA + "_LM"
    FILE_NAME_JIEBA_PK = FILE_NAME_JIEBA + "_PK"
    FILE_NAME_JIEBA_PINYIN = FILE_NAME_JIEBA + "_PINYIN"

tyccl_list_name = './data/TYCCL.dat_LIST'
tyccl_mean_name = './data/TYCCL.dat_MEAN'

JIEBA_HZ = {}
if not os.path.exists(FILE_NAME_JIEBA_PK):
    print("请计算产生词频数据!")
Beispiel #3
0
 def import_userdict(self, dict_path):
     nlpir.ImportUserDict(dict_path)
 def import_user_dict(path):
     print "Before Import User Dict: ", time.strftime('%Y-%m-%d %H:%M:%S')
     n = nlpir.ImportUserDict(os.path.join(TEXT_RESOURCE, path))
     nlpir.SaveTheUsrDic()
     print "Success Import: ", n
     print "Done: ", time.strftime('%Y-%m-%d %H:%M:%S')