def ner(): key = 'text' val = request.args.get(key) word_to_id, tag_to_id, id_to_tag, sess, model = Singleton.get_instance() batch = n_service.get_batch_manager(id_to_tag, tag_to_id, val, word_to_id) with sess.as_default(): ner_results = model.predict2(sess, batch) labels = [] datas = [] for i, ret in enumerate(ner_results): strs = ret.split(" ") labels.append(strs[1]) datas.append(strs[0]) results = [] for i in range(len(datas)): if labels[i] == "O": results.append(datas[i]) else: results.append(datas[i] + labels[i]) ret2 = " ".join(results) ret = "key:%s <br> <h4>value:%s</h4> " % (val, ret2) return ret
def ner_text(): key = 'text' val = request.args.get(key) type = request.args.get('type') word_to_id, tag_to_id, id_to_tag, sess, model = Singleton.get_instance() batch = n_service.get_batch_manager(id_to_tag, tag_to_id, val, word_to_id) with sess.as_default(): ner_results = model.predict2(sess, batch) datas = [] idx = 0 is_flag = False item = {} for i, ret in enumerate(ner_results): strs = ret.split(" ") label = strs[1] data = strs[0] # print "label", label if label.startswith("B"): item = Item() item.type = label[2:] item.start = idx is_flag = True idx += len(data) elif label.startswith("I"): idx += len(data) elif is_flag and label.startswith("O"): # 遇到O结束了 idx += len(data) item.end = idx is_flag = False text = val[item.start:idx - 1] item.text = text # print "start", item.start, "end", idx, "text", text datas.append(item.to_string()) else: idx += len(data) if is_flag: item.end = idx text = val[item.start:idx] # print "start2", item.start, "end", idx, "text", text item.text = text # print json.dumps(item) datas.append(item.to_string()) # d = {} # d["dict"] = datas # ret = json.dumps(datas) ret = ",".join(datas) ret = "{\"docs\":[%s]}" % (ret) print "ret", ret return ret
def process(self, req): res = NerResponse() val = req.query #print "get query", val word_to_id, tag_to_id, id_to_tag, sess, model = Singleton.get_instance( ) batch = n_service.get_batch_manager(id_to_tag, tag_to_id, val, word_to_id) with sess.as_default(): ner_results = model.predict2(sess, batch) datas = [] val = val.encode('utf-8') # print "val-len", len(val) idx = 0 is_flag = False item = {} for i, ret in enumerate(ner_results): strs = ret.split(" ") label = strs[1] data = strs[0].encode('utf-8') # 中文3个,英文1个 # print "label", label if label.startswith("B"): item = Item() item.type = dict[label[2:]] item.start_idx = idx is_flag = True idx += len(data) elif label.startswith("I"): idx += len(data) elif is_flag and label.startswith("O"): # 遇到O结束了 # idx += len(data) item.end_idx = idx is_flag = False text = val[item.start_idx:idx] item.text = text.encode('utf-8') datas.append(item) idx += len(data) else: idx += len(data) if is_flag: item.end_idx = idx text = val[item.start_idx:idx] item.text = text.encode('utf-8') datas.append(item) res.docs = datas res.status = 200 model.logger.info("req:{},res:{}".format(req.query, datas)) return res
def ner_text_prob(): key = 'text' val = request.args.get(key) print "val-repr", repr(val) word_to_id, tag_to_id, id_to_tag, sess, model = Singleton.get_instance() batch = n_service.get_batch_manager(id_to_tag, tag_to_id, val, word_to_id) with sess.as_default(): ner_results = model.predict_probility(sess, batch) datas = [] val = val.encode('utf-8') # print "val-len", len(val) idx = 0 is_flag = False item = {} for i, ret in enumerate(ner_results): strs = ret.split(" ") label = strs[1] data = strs[0].encode('utf-8') #中文3个,英文1个 # print "label", label if label.startswith("B"): item = Item() item.type = dict[label[2:]] item.start = idx item.score = float(strs[2]) is_flag = True idx += len(data) elif label.startswith("I"): idx += len(data) item.score *= float(strs[2]) elif is_flag and label.startswith("O"): # 遇到O结束了 #idx += len(data) item.end = idx is_flag = False text = val[item.start:idx] item.text = text item.score = np.power(item.score, float(1.0 / len(text.decode('utf-8')))) # print "text-mid-size", len(text.decode('utf-8')) # print "start", item.start, "end", idx, "text", text datas.append(item.to_string()) idx += len(data) else: idx += len(data) if is_flag: # 每一个score都计算了的 item.end = idx text = val[item.start:idx] # print "start2", item.start, "end", idx, "text", text item.text = text item.score = np.power(item.score, float(1.0 / len(text.decode('utf-8')))) datas.append(item.to_string()) # d = {} # d["dict"] = datas # ret = json.dumps(datas) ret = ",".join(datas) ret = "{\"docs\":[%s]}" % (ret) print "ret", ret return ret