def start_extract(opontions, prerequisite, requirement, model_dir): try: items1 = extract_approval_opinion(opontions, model_dir) if opontions else {} print('items1items1', items1) except Exception as e: logger.error('抽取审批意见报错') logger.error(e) items1 = {} try: items2 = extract_preconditions(prerequisite, model_dir) if prerequisite else {} print('items2items2', items2) except Exception as e: logger.error('抽取前提条件报错') logger.error(str(e) + traceback.format_exc()) items2 = {} try: items3 = extract_management_condition( requirement, model_dir) if requirement else {} print('items3items3', items3) except Exception as e: logger.error('抽取管理要求报错') logger.error(e) items3 = {} logger.info('start_extract::{}'.format(return_items)) return return_items
def __init__(self, output_dir, tmp_dir, upload_dir, port, model_links_dir='model', config_links_dir='config'): # if not os.path.exists(output_dir): # raise ValueError('参数output_dir目录并不存在'.encode('utf-8')) # if not os.path.exists(tmp_dir): # raise ValueError('参数tmp_dir目录并不存在'.encode('utf-8')) # if not os.path.exists(upload_dir): # raise ValueError('参数upload_dir目录并不存在'.encode('utf-8')) # if not isinstance(port, int) or port <= 0 or port > 65535: # raise ValueError('参数port必须合法(0~65535)') self._output_dir = output_dir self._tmp_dir = tmp_dir self._upload_dir = upload_dir self._port = port self._model_links_dir = model_links_dir self._config_links_dir = config_links_dir u_shape_framework.set_logger(logger) logger.info('initializing u_shape_framework engine ...') initialize_engine(u_shape_framework_conf) logger.info('engine initialize finish')
def predict(self, content, field_config): logger.info('start to run online workflow') workflow = get_current_engine().get_workflow('otonline') request_property = {'content': content, 'field_config': field_config} output = workflow.run(request_property) result = output['results'] return result
def predict(self, doctype, content, rich_content, fields=()): workflow_name = 'online' workflow = get_current_engine().get_workflow('online') logger.info('start to run {} workflow'.format(workflow_name)) request_property = { # 'models_manager': self.models_manager, 'doctype': doctype, 'rich_content': rich_content, 'content': content, } if fields: request_property['fields'] = fields output = workflow.run(request_property) result = { 'result': '', } return result
def cut_prerequisite(prerequisite, type): CNYD_REG = re.compile(r'[^0-9::]+[::]') return_items = {'result': [], 'type': type} if not prerequisite: return return_items text, mapper = prerequisite[0], prerequisite[1] logger.info('前提条件文本内容:{}'.format(text)) reg_1 = re.compile(r'[ 1一、.()\s]+') result_reg_1 = reg_1.match(text) split_sign = result_reg_1.group() if result_reg_1 else 'xxx' split_sign = re.sub(r'[\s ]', r'', split_sign) if split_sign not in SPLIT_SIGN_DICT: cnyd_result = CNYD_REG.match(text) if not cnyd_result: return_items['result'].append( cut_prerequisite_nosign(text, mapper, type)) else: if re.search(r'承诺|约定|访谈|书面记录|(协议|合同).*[增添]加', cnyd_result.group()): return_items['type'] = r'承诺' return_items['result'].append({ 'result': [[cnyd_result.group(), mapper[cnyd_result.start()]]], 'type': return_items['type'] }) start, end = cnyd_result.span() return_items['result'].append( cut_prerequisite([text[end:], mapper[end:]], return_items['type'])) else: split_sign_reg = SPLIT_SIGN_DICT[split_sign] split_sign_len = len(split_sign) splited_ = re.split(split_sign_reg, text) index = 0 for idx, each in enumerate(splited_[:]): if re.sub(r'\s', r'', each): return_items['result'].append( cut_prerequisite([each, mapper[index:index + len(each)]], return_items['type'])) index += len(each) + split_sign_len return return_items
def start(self): # models_manager = ClassifyModelsManager(self._output_dir, ClassifyPredictor, logger, self._model_links_dir, # self._config_links_dir) logger.info('start server...') app = tornado.web.Application(handlers=[ ( conf.PREDICT_ROUTER, PredictHandler, # { # # 'models_manager': models_manager, # 'tmp_dir': self._tmp_dir, # 'upload_dir': self._upload_dir # } ), ( conf.PREDICT_PATH_ROUTER, PredictPathHandler, { # 'models_manager': models_manager, 'tmp_dir': self._tmp_dir, 'upload_dir': self._upload_dir }), ( conf.RELOAD_ROUTER, ReloadHandler, { # 'models_manager': models_manager }), ( conf.PREDICT_BY_FIELDS, PredictByFieldsHandler, { # 'models_manager': models_manager, 'tmp_dir': self._tmp_dir, 'upload_dir': self._upload_dir }) ]) app.listen(address='0.0.0.0', port=self._port) logger.info('server starts with address: 0.0.0.0, port: {}'.format( self._port)) tornado.ioloop.IOLoop.current().start()
def extract_management_condition(sent_map, model_dir): text, all_mapper = sent_map[0], sent_map[1] each_index = 0 re_compile_mid = [] each_index = 0 each_sent_list_juhao = re.split(r'[;; 。]', text) for each_sent in each_sent_list_juhao: if not each_sent.replace(r' ', ''): each_index += len(each_sent) + 1 continue field_id, prob = extract(each_sent, model_dir) if field_id not in return_items: return_items[field_id] = [] return_items.get(field_id).append( [each_sent, prob, all_mapper[each_index]]) each_index += len(each_sent) + 1 logger.info('extract_management_condition::{}'.format(return_items)) return return_items
def extract_preconditions(sents_map, model_dir): cuted_sent_dict = cut_prerequisite(sents_map, r'非承诺') # # return_items = {u'high': [], u'mid': [], u'low': []} # return_items = {} tk_list = solve(cuted_sent_dict) # re_compile_list_high = PRECONDITIONS_HIGH # re_compile_list_MID = PRECONDITIONS_MID for each_tk in tk_list: tk_content = each_tk[0] tk_start_idx = each_tk[1] tk_type = each_tk[2] print('tk_content......................', tk_content) field_id, prob = extract(tk_content, model_dir) if field_id not in return_items: return_items[field_id] = [] return_items.get(field_id).append([tk_content, prob, tk_start_idx]) logger.info('extract_preconditions::{}'.format(return_items)) return return_items
def post(self): result = {'status': 'OK', 'msg': ''} init_time = time.time() try: data = json.loads(self.request.body) caller_request_id = data.get('caller_request_id', None) self_request_id = generate_request_id() logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id}) doctype, content, rich_content_path = str(data['doctype']), data['content'], data['rich_content_path'] logger.info('received data keys: {}, doctype: {}, content: {} ......'.format( list(data.keys()), doctype, content[:100])) with codecs.open('{}/{}'.format(self.upload_dir, rich_content_path)) as f: rich_content = json.loads(f.read()) result.update(self.predict(doctype, content, rich_content)) except Exception as e: result['status'] = 'ERROR' result['msg'] = '{}'.format(e) logger.exception(e) result_str = json.dumps(result, ensure_ascii=False) self.write(result_str) logger.info('results: {}, cost time: {}s'.format(result_str, time.time() - init_time)) logger.update_logger_extra()
def post(self): result = {'status': 'OK', 'msg': ''} init_time = time.time() try: data = json.loads(self.request.body) caller_request_id = data.get('caller_request_id', None) self_request_id = generate_request_id() logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id}) doctype, content, rich_content, fields = str(data['doctype']), data['content'], \ data['rich_content'], data['fields'] logger.info('received data keys: {}, doctype: {}, fields: {}, content: {} ......'.format( list(data.keys()), doctype, fields, content[:100])) if not isinstance(fields, list): raise ValueError('args: fields must be list') result.update(self.predict(doctype, content, rich_content, fields)) except Exception as e: result['status'] = 'ERROR' result['msg'] = '{}'.format(e) logger.exception(e) result_str = json.dumps(result, ensure_ascii=False) self.write(result_str) logger.info('results: {}, cost time: {}s'.format(result_str, time.time() - init_time)) logger.update_logger_extra()
def post(self): try: # get argument model_version = self.get_argument('model_version') caller_request_id = self.get_argument('caller_request_id', default=None) self_request_id = generate_request_id() logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id}) logger.info('model_version: {}'.format(model_version)) # 更改model软链 logger.info('links updating ...') # self.models_manager.update_links(model_version) # reload models logger.info('reloading ...') # self.models_manager.reload_models() logger.info('reloaded') except Exception as e: logger.exception(e) raise Exception(e) finally: logger.update_logger_extra()
def setUp(self): # init u_shape_engine u_shape_framework.set_logger(logger) logger.info('initializing u_shape_framework engine ...') pass
def _get_features(self, doctype, field, content): logger.info('get features for doctype: {}, field: {}'.format( doctype, field)) return content