def load_hmd_model(self, model_name): model_path = '{0}/{1}__0.hmdmodel'.format( self.conf.get('brain-ta.hmd.model.dir'), model_name) if not os.path.exists(model_path): raise Exception( '[ERROR] Not existed HMD model. [{0}]'.format(model_path)) try: in_file = open(model_path, 'rb') hm = hmd_pb2.HmdModel() hm.ParseFromString(in_file.read()) in_file.close() return hm except Exception: raise Exception(traceback.format_exc())
def load_model(model_path): """ Load Model :param model_path: Model Path :return: Model Value """ if not model_path.endswith('.hmdmodel'): raise Exception('model extension is not .hmdmodel : {0}'.format(model_path)) try: in_file = open(model_path, 'rb') hm = hmd_pb2.HmdModel() hm.ParseFromString(in_file.read()) in_file.close() return hm except Exception: raise Exception(traceback.format_exc())
def make_hmd_model(self, temp_list, model_name): model = hmd_pb2.HmdModel() model.lang = lang_pb2.kor model.model = model_name rules = list() for item in temp_list: category_list = item[0] dtc_rule = item[1] rule = hmd_pb2.HmdRule() rule.rule = dtc_rule rule.categories.extend(category_list) rules.append(rule) model.rules.extend(rules) self.stub.SetModel(model) model_key = hmd_pb2.ModelKey() model_key.lang = lang_pb2.kor model_key.model = model_name self.stub.GetModel(model_key)
def set_model(self, model_name, target_list): model = hmd_pb2.HmdModel() model.lang = lang_pb2.kor model.model = model_name rules_list = list() for item_dict in target_list: category = item_dict['category'] rule = item_dict['rule'] category_list = category.split('!@#$') hmd_client = hmd_pb2.HmdRule() hmd_client.rule = rule hmd_client.categories.extend(category_list) rules_list.append(hmd_client) model.rules.extend(rules_list) self.stub.SetModel(model) model_key = hmd_pb2.ModelKey() model_key.lang = lang_pb2.kor model_key.model = model_name
def load_hmd_model(category_delimiter, model_name): """ Load HMD model :param category_delimiter: HMD category delimiter :param model_name: Model name :return: HMD dictionary """ conf = Config() conf.init('brain-ta.conf') model_path = '{0}/{1}__0.hmdmodel'.format( conf.get('brain-ta.hmd.model.dir'), model_name) if not os.path.exists(model_path): raise Exception( '[ERROR] Not existed HMD model [{0}]'.format(model_path)) try: in_file = open(model_path, 'rb') hm = hmd_pb2.HmdModel() hm.ParseFromString(in_file.read()) in_file.close() # Make HMD matrix list matrix_list = list() for rules in hm.rules: dtc_keyword_list = list() rule_list = split_hmd_rule(rules.rule) for idx in range(len(rule_list)): dtc_keyword = rule_list[idx].split('|') dtc_keyword_list.append(dtc_keyword) tmp_list = list() category = category_delimiter.join(rules.categories) matrix_list += vec_word_combine(tmp_list, category, '', dtc_keyword_list, 0, rules.rule) # Make HMD matrix dictionary hmd_dict = dict() for category, dtc_keyword, hmd_rule in matrix_list: if len(category) < 1 or category.startswith( '#') or len(dtc_keyword) < 1: continue if dtc_keyword not in hmd_dict: hmd_dict[dtc_keyword] = [[category, hmd_rule]] else: hmd_dict[dtc_keyword].append([category, hmd_rule]) return hmd_dict except Exception: raise Exception(traceback.format_exc())
def set_model(self, model_name, target_file_path): model = hmd_pb2.HmdModel() model.lang = lang_pb2.kor model.model = model_name rules_list = list() with open(target_file_path) as target_file: for line in target_file: line = line.strip() line_list = line.split(self.args.file_delimiter) if len(line_list) < 2: print '[ERROR] Line field count at least two [{0}]'.format( line) continue hmd_client = hmd_pb2.HmdRule() hmd_client.rule = line_list[-1] hmd_client.categories.extend(line_list[:-1]) rules_list.append(hmd_client) model.rules.extend(rules_list) self.stub.SetModel(model) model_key = hmd_pb2.ModelKey() model_key.lang = lang_pb2.kor model_key.model = model_name
def set_model(self): model = hmd_pb2.HmdModel() model.lang = lang_pb2.kor model.model = 'news' rules = list() rule1 = hmd_pb2.HmdRule() # 형태소 분석 결과를 바탕으로 원형 단어를 이용 # ex) 안녕하세요. -> 안녕하/pa 시/ep 어/ec 요/jx ./s (ETRI 기준) # 안녕하, 시어, 요 를 이용 rule을 제작 rule1.rule = '(안녕하)' rule1.categories.extend(['level1', 'level2']) rules.append(rule1) rule2 = hmd_pb2.HmdRule() rule2.rule = '(자연)' rule2.categories.extend(['level1', 'level3']) rules.append(rule2) model.rules.extend(rules) self.stub.SetModel(model) model_key = hmd_pb2.ModelKey() model_key.lang = lang_pb2.kor model_key.model = 'news' ret_model = self.stub.GetModel(model_key) print unicode(ret_model)
def set_model2(self, file_name, model_name): model = hmd_pb2.HmdModel() model.lang = lang_pb2.kor model.model = model_name rules = list() f = open(file_name) for line in f.readlines(): tokens = line.strip().split('\t') # level1, level2, level3... levels = tokens[:-1] # Last element of tokens keyword = tokens[-1] print levels, keyword rule = hmd_pb2.HmdRule() rule.rule = keyword rule.categories.extend(levels) rules.append(rule) model.rules.extend(rules) self.stub.SetModel(model) model_key = hmd_pb2.ModelKey() model_key.lang = lang_pb2.kor model_key.model = model_name ret_model = self.stub.GetModel(model_key) print unicode(ret_model)