def demo(model, config, id_to_tag, tag_to_id): logger = get_logger(config.log_file) tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: ckpt = tf.train.get_checkpoint_state(config.ckpt_path) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) saver = tf.train.Saver() if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): logger.info("Reading model parameters from %s" % ckpt.model_checkpoint_path) # saver = tf.train.import_meta_graph('ckpt/ner.ckpt.meta') # saver.restore(session, tf.train.latest_checkpoint("ckpt/")) saver.restore(sess, ckpt.model_checkpoint_path) while True: line = input("input sentence, please:") inputs = input_from_line(line, config.max_seq_len, tag_to_id) trans = model.trans.eval(sess) feed_dict = get_feed_dict(model, False, inputs, config) lengths, scores = sess.run([model.lengths, model.logits], feed_dict) batch_paths = decode(scores, lengths, trans, config) tags = [id_to_tag[idx] for idx in batch_paths[0]] result = bio_to_json(inputs[0], tags[1:-1]) print(result['entities'])
def bert_ner_infer(): params = json.loads(request.get_data(), encoding="utf-8") text = params["text"] url = params["url"] x, len_list = create_infer_inputs(text, max_len, tokenizer) print("len_list: ", len_list) input_ids = x[0].tolist() token_type_ids = x[1].tolist() attention_mask = x[2].tolist() data = json.dumps({ "signature_name": "serving_default", "inputs": { "input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask } }) headers = {"content-type": "application/json"} result = requests.post(url, data=data, headers=headers) result = json.loads(result.text) pred_logits = result["outputs"][0] pred = np.argmax(pred_logits, axis=1).tolist() print("pred: ", pred) predict_label = [] for j in range(min(len_list[0], max_len)): predict_label.append(id2tag[pred[j]]) return_result = bio_to_json(text, predict_label) return jsonify(return_result)
def evaluate_line(self, sess, inputs, id_to_tag): trans = self.trans.eval(sess) lengths, scores = self.run_step(sess, False, inputs) batch_paths = self.decode(scores, lengths, trans) tags = [id_to_tag[idx] for idx in batch_paths[0]] return bio_to_json(inputs[0], tags[1:-1])
def bert_ner_model_infer(sentence, tag_to_id, id_to_tag, max_length, docker_url): final_result = [] return_result = {"code": 200, "message": "success"} # sentence 去掉空格 sentence_ = sentence.replace('\r\n', '✈').replace(' ', '✈') sentence_ = sentence_.replace('\u3000', '✈') # todo 新增处理 sentence_ = sentence_.replace('\xa0', '✈') # 统计空格和韩文的信息 null_index_list, korean_index_list, null_korean_dict, m1 = count_korean_null_info( sentence_) sentence_ = ''.join(sentence_.split()) # sentence 去掉韩文 sentence_ = re.sub('[\uac00-\ud7ff]+', '', sentence_) try: # for sentence in text: # _, segment_ids, word_ids, word_mask, label_ids token_result = input_from_line(sentence_, max_length, tag_to_id) # FLAGS.max_seq_len word_ids = token_result[2].tolist() word_mask = token_result[3].tolist() seg_ids = token_result[1].tolist() data = json.dumps({ "signature_name": "serving_default", "inputs": { "input_ids": word_ids, "input_mask": word_mask, "segment_ids": seg_ids, "dropout": 1.0 } }) headers = {"content-type": "application/json"} json_response = requests.post(docker_url, data=data, headers=headers) if json_response.status_code == 200: result = json.loads(json_response.text) if result == '': temp = {'content': sentence, "Entity": []} logger.info("实体识别最终结果:" + str(temp)) return temp else: pred = result["outputs"][0] pred = np.array(pred) label_list = pred.argmax(axis=1).tolist()[1:-1] # 还原空格和韩语 if len(null_korean_dict) > 0: label_list = preprocessing_korean_null( korean_index_list, null_korean_dict, m1, label_list, tag_to_id) pred_label = [] for i in range(min(len(sentence), max_length - 2)): # FLAGS.max_seq_len pred_label.append(id_to_tag[label_list[i]]) pred_label = processing_general_format(pred_label) logger.info("模型预测的结果: {}".format(pred_label)) # pred_label = pred_label[1:-1] res = bio_to_json(sentence, pred_label) if len(res['entities']) != 0: for i in range(len(res['entities'])): if res['entities'][i]["name"] != " ": final_result.append(res['entities'][i]) else: final_result = [] temp = {'content': sentence, "Entity": final_result} logger.info("实体识别抽取结果:" + str(temp)) return temp else: temp = {'content': sentence, "Entity": []} logger.info("实体识别抽取最终结果:" + str(temp)) return temp except Exception as e: logger.error(traceback.format_exc()) return_result["code"] = 400 return_result["message"] = traceback.format_exc() return_result['content'] = '' return_result["Entity"] = [] return return_result
def evaluate_line(self, sess, inputs, id_to_tag): lengths, batch_paths = self.run_step(sess, False, inputs) print(batch_paths) tags = [id_to_tag[idx] for idx in batch_paths[0][2:lengths[0]]] return bio_to_json(inputs[0], tags)