def process(model=None, text="", tokenizer_=None, use_crf=False, tag_to_index=None, vocab=""): """ process text. """ data = [text] features = [] res = [] ids = [] for i in data: f.write("text: " + str(i) + '\n') feature = process_one_example_p(tokenizer_, vocab, i, max_seq_len=bert_net_cfg.seq_length) features.append(feature) input_ids, input_mask, token_type_id = feature f.write("input_ids: " + str(input_ids) + '\n') f.write("input_mask: " + str(input_mask) + '\n') f.write("segment_ids: " + str(token_type_id) + '\n') input_ids = Tensor(np.array(input_ids), mstype.int32) input_mask = Tensor(np.array(input_mask), mstype.int32) token_type_id = Tensor(np.array(token_type_id), mstype.int32) if use_crf: backpointers, best_tag_id = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) best_path = postprocess(backpointers, best_tag_id) logits = [] for ele in best_path: logits.extend(ele) ids = logits else: logits = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) ids = logits.asnumpy() ids = np.argmax(ids, axis=-1) ids = list(ids) f.write("pre_labels: " + str(ids) + '\n') res = label_generation(text=text, probs=ids, tag_to_index=tag_to_index) return res
def update(self, logits, labels): ''' update F1 score ''' labels = labels.asnumpy() labels = np.reshape(labels, -1) if cfg.use_crf: backpointers, best_tag_id = logits best_path = postprocess(backpointers, best_tag_id) logit_id = [] for ele in best_path: logit_id.extend(ele) else: logits = logits.asnumpy() logit_id = np.argmax(logits, axis=-1) logit_id = np.reshape(logit_id, -1) pos_eva = np.isin(logit_id, [i for i in range(1, cfg.num_labels)]) pos_label = np.isin(labels, [i for i in range(1, cfg.num_labels)]) self.TP += np.sum(pos_eva & pos_label) self.FP += np.sum(pos_eva & (~pos_label)) self.FN += np.sum((~pos_eva) & pos_label)
def process(model=None, text="", tokenizer_=None, use_crf="", label2id_file=""): """ process text. """ data = [text] features = [] res = [] ids = [] for i in data: feature = process_one_example_p(tokenizer_, i, max_seq_len=bert_net_cfg.seq_length) features.append(feature) input_ids, input_mask, token_type_id = feature input_ids = Tensor(np.array(input_ids), mstype.int32) input_mask = Tensor(np.array(input_mask), mstype.int32) token_type_id = Tensor(np.array(token_type_id), mstype.int32) if use_crf.lower() == "true": backpointers, best_tag_id = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) best_path = postprocess(backpointers, best_tag_id) logits = [] for ele in best_path: logits.extend(ele) ids = logits else: logits = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) ids = logits.asnumpy() ids = np.argmax(ids, axis=-1) ids = list(ids) res = label_generation(text=text, probs=ids, label2id_file=label2id_file) return res