예제 #1
0
def process(model=None,
            text="",
            tokenizer_=None,
            use_crf=False,
            tag_to_index=None,
            vocab=""):
    """
    process text.
    """
    data = [text]
    features = []
    res = []
    ids = []
    for i in data:
        f.write("text: " + str(i) + '\n')
        feature = process_one_example_p(tokenizer_,
                                        vocab,
                                        i,
                                        max_seq_len=bert_net_cfg.seq_length)
        features.append(feature)
        input_ids, input_mask, token_type_id = feature
        f.write("input_ids:  " + str(input_ids) + '\n')
        f.write("input_mask:  " + str(input_mask) + '\n')
        f.write("segment_ids: " + str(token_type_id) + '\n')
        input_ids = Tensor(np.array(input_ids), mstype.int32)
        input_mask = Tensor(np.array(input_mask), mstype.int32)
        token_type_id = Tensor(np.array(token_type_id), mstype.int32)
        if use_crf:
            backpointers, best_tag_id = model.predict(input_ids, input_mask,
                                                      token_type_id, Tensor(1))
            best_path = postprocess(backpointers, best_tag_id)
            logits = []
            for ele in best_path:
                logits.extend(ele)
            ids = logits
        else:
            logits = model.predict(input_ids, input_mask, token_type_id,
                                   Tensor(1))
            ids = logits.asnumpy()
            ids = np.argmax(ids, axis=-1)
            ids = list(ids)
            f.write("pre_labels: " + str(ids) + '\n')
    res = label_generation(text=text, probs=ids, tag_to_index=tag_to_index)
    return res
예제 #2
0
 def update(self, logits, labels):
     '''
     update F1 score
     '''
     labels = labels.asnumpy()
     labels = np.reshape(labels, -1)
     if cfg.use_crf:
         backpointers, best_tag_id = logits
         best_path = postprocess(backpointers, best_tag_id)
         logit_id = []
         for ele in best_path:
             logit_id.extend(ele)
     else:
         logits = logits.asnumpy()
         logit_id = np.argmax(logits, axis=-1)
         logit_id = np.reshape(logit_id, -1)
     pos_eva = np.isin(logit_id, [i for i in range(1, cfg.num_labels)])
     pos_label = np.isin(labels, [i for i in range(1, cfg.num_labels)])
     self.TP += np.sum(pos_eva & pos_label)
     self.FP += np.sum(pos_eva & (~pos_label))
     self.FN += np.sum((~pos_eva) & pos_label)
예제 #3
0
def process(model=None,
            text="",
            tokenizer_=None,
            use_crf="",
            label2id_file=""):
    """
    process text.
    """
    data = [text]
    features = []
    res = []
    ids = []
    for i in data:
        feature = process_one_example_p(tokenizer_,
                                        i,
                                        max_seq_len=bert_net_cfg.seq_length)
        features.append(feature)
        input_ids, input_mask, token_type_id = feature
        input_ids = Tensor(np.array(input_ids), mstype.int32)
        input_mask = Tensor(np.array(input_mask), mstype.int32)
        token_type_id = Tensor(np.array(token_type_id), mstype.int32)
        if use_crf.lower() == "true":
            backpointers, best_tag_id = model.predict(input_ids, input_mask,
                                                      token_type_id, Tensor(1))
            best_path = postprocess(backpointers, best_tag_id)
            logits = []
            for ele in best_path:
                logits.extend(ele)
            ids = logits
        else:
            logits = model.predict(input_ids, input_mask, token_type_id,
                                   Tensor(1))
            ids = logits.asnumpy()
            ids = np.argmax(ids, axis=-1)
            ids = list(ids)
    res = label_generation(text=text, probs=ids, label2id_file=label2id_file)
    return res