def on_modified(self, event): # /dict/phrase 被修改,重新加载词典 if event.key[0] == 'modified' and ( event.key[1].split(r'/')[-1] == 'phrase' or event.key[1].split(r'/')[-1] == 'phrase_local') and event.key[2] is False: lock.acquire() collector_service.reload_dict() lock.release() # /hanlp.properties 被修改,重新加载Hanlp分词词典 if event.key[0] == 'modified' and 'hanlp' in event.key[ 1] and event.key[2] is False: hanlp_segmentor.HanlpSegmentor().reload_custom_dictionry()
def __init__(self): # get idf dict self.dit = {} with open("./idf_clean.txt", "r") as f: for line in f: line = line.strip("\n").split(",") k = line[0] v = float(line[1]) if k not in self.dit: self.dit[k] = v else: pass f.close() self.sgementor = hanlp_segmentor.HanlpSegmentor()
def __init__(self): home_dir = os.path.dirname( os.path.abspath(inspect.getsourcefile(lambda: 0))) conf = configparser.ConfigParser() conf.read(CONFIG_FILE) # 各种路径 self.model_file_path = home_dir + conf.get("crf", "model") self.template_file_path = home_dir + conf.get("crf", "template") self.train_file_path = home_dir + conf.get("crf", "train_file") self.test_file_path = home_dir + conf.get("crf", "test_file") self.predict_file_path = self.test_file_path + '_predict' self.segmentor = hanlp_segmentor.HanlpSegmentor()
def __init__(self): self.term_rank = term_ranking.TermRank() self.segmentor = hanlp_segmentor.HanlpSegmentor() self.ahocorasick = ac_search.ACSearch() home_dir = os.path.dirname(os.path.abspath(inspect.getsourcefile(lambda: 0))) conf = configparser.ConfigParser() conf.read(CONFIG_FILE) self.te = abc_time.ABCYear() self.phrase_dict_path = [home_dir + dict_file.strip() for dict_file in conf.get("dictionary", "phrase").split(';')] self.domain = conf.get("domain", "domain") self.phrase_dict = dict() self.reload_dict()
def __init__(self): self.term_rank = term_ranking.TermRank() self.segmentor = hanlp_segmentor.HanlpSegmentor() self.ahocorasick = ac_search.ACSearch() home_dir = os.path.dirname(os.path.abspath(inspect.getsourcefile(lambda: 0))) conf = configparser.ConfigParser() conf.read(CONFIG_FILE) useless_dict_path = home_dir + conf.get("dictionary", "norm_useless") self.ahocorasick.add_dict(useless_dict_path) self.ahocorasick.start() self.te = abc_time.ABCYear() weight_drop = home_dir + conf.get("dictionary", "weight_drop") self.weight_drop = set() with open(weight_drop) as f: for line in f: self.weight_drop.add(line.strip('\n')) self.phrase_dict_path = home_dir + conf.get("dictionary", "phrase") self.phrase_dict = dict() self.reload_dict()
def __init__(self, sentences_path): self.segmentor = hanlp_segmentor.HanlpSegmentor() self.sentences_path = sentences_path
def __init__(self): self.get_tag_obj = SequenceTag() self.segmentor = hanlp_segmentor.HanlpSegmentor()