Esempio n. 1
0
 def evaluate(self, sess, data_manager, id_to_tag):
     """
     :param sess: session  to run the model 
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()
     for batch in data_manager.iter_batch(
     ):  # batch dimension: 4*BatchSize*MaxLenSentence
         strings = batch[0]  # string dimension: BatchSize*MaxLenSentence
         tags = batch[-1]  # tags dimension: BatchSize*MaxLenSentence
         lengths, scores = self.run_step(
             sess, False, batch
         )  # score就是self.logits的计算结果 BatchSize*MaxLenSentence*num_tags
         batch_paths = self.decode(
             scores, lengths,
             trans)  # batch_paths dimension: BatchSize*每句实际长度
         # 每个值为每个字符由模型算出的对应标签ID
         for i in range(len(strings)):  # 从batch中取出一句
             result = []
             string = strings[i][:lengths[i]]  # string dimension: 第i句实际长度
             gold = iobes_iob([
                 id_to_tag[int(x)] for x in tags[i][:lengths[i]]
             ])  # gold dimension: 第i句实际长度
             pred = iobes_iob([
                 id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]
             ])  # pred dimension: 第i句实际长度
             for char, gold, pred in zip(string, gold, pred):  # 循环一次一个句子
                 result.append(" ".join([char, gold, pred]))
             results.append(result)
     return results  # dimension: eval/test样本数量*每句实际长度  每个三中第一个为字符,第二个为正确标签,第三个为预测标签
Esempio n. 2
0
    def evaluate(self, sess, data, id_to_tag):
        """
        :param sess: session  to run the model 
        :param data: list of data
        :param id_to_tag: index to tag name
        :return: evaluate result
        """
        results = []
        #loss = []
        trans = self.trans.eval()

        chars, segs, string, tags, sequence_length = seperate_input(data)
        batch = [chars, segs, tags, sequence_length]
        lengths, scores, loss = self.run_step(sess, False, batch)
        for item in data:
            str_lines = item["string"]
            seqlen = item["seqlen"]
            tags = [item["tags"][:seqlen]]
            #loss.append(batch_loss)
            lengths = [seqlen]
            batch_paths = self.decode(scores, lengths, trans)
            for i in range(len(tags)):
                result = []
                strings = [str_lines][i][:lengths[i]]
                tags = iobes_iob(
                    [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                preds = iobes_iob(
                    [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
                for char, gold, pred in zip(strings, tags, preds):
                    result.append(" ".join([char, gold, pred]))
                results.append(result)
        return results, np.mean(loss)
 def evaluate(self, sess, data_manager, id_to_tag):
     """
     :param sess: session  to run the model
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()  #转移矩阵的验证
     for batch in data_manager.iter_batch():
         strings = batch[0]
         tags = batch[-1]
         lengths, scores = self.run_step(sess, False,
                                         batch)  #将验证集进行前向计算得到字数和输出
         batch_paths = self.decode(scores, lengths,
                                   trans)  #得到这一批句子中每个字的预测的命名实体的id
         for i in range(len(strings)):  #遍历每个句子
             result = []
             string = strings[i][:lengths[i]]  #这句话的前i个字,实际上就是这批句子的每句话
             gold = iobes_iob([
                 id_to_tag[int(x)] for x in tags[i][:lengths[i]]
             ])  #将真实的tag转化为iobes的形式
             pred = iobes_iob([
                 id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]
             ])  #将预测的tag转化为iobes的形式
             for char, gold, pred in zip(string, gold, pred):
                 result.append(" ".join([char, gold, pred]))  #将句子等打包作为输出
             results.append(result)
     return results
Esempio n. 4
0
    def get_reward(self,sess, batch, lengths, scores, id_to_tag):
        results = []
        strings = batch[0]
        tags = batch[-1]
        trans = self.trans.eval()
        batch_paths = self.decode(scores, lengths, trans)
        for i in range(len(strings)):
            result = []
            string = strings[i][:lengths[i]]
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
            pred = iobes_iob([id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
            for char, gold, pred in zip(string, gold, pred):
                result.append(" ".join([char, gold, pred]))
            results.append(result)
        predicts = []
        targets = []
        rewards = []
        for item in results:
            predict = [ ner_item.split(" ")[1] for ner_item in item]
            target = [ner_item.split(" ")[2] for ner_item in item]
            if predict != target:
                # print( "wrong", predict, target)
                rewards.append(0)
            else:
                rewards.append(1)
            predicts.append(predict)
            targets.append(target)
        rewards = self.effective_rl.compute_sentence_reward(predicts, targets)

        # print(rewards)
        assert(len(strings) == len(predicts))
Esempio n. 5
0
 def evaluate(self, sess, data, id_to_tag):
     """
     :param sess: session  to run the model 
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()
     for item in data:
         batch = create_input(item)
         str_lines = item["string"]
         tags = [item["tags"]]
         lengths, scores = self.run_step(sess, False, batch)
         lengths = [lengths]
         batch_paths = self.decode(scores, lengths, trans)
         for i in range(len(tags)):
             result = []
             strings = [str_lines][i][:lengths[i]]
             tags = iobes_iob(
                 [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
             preds = iobes_iob(
                 [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
             for char, gold, pred in zip(strings, tags, preds):
                 result.append(" ".join([char, gold, pred]))
             results.append(result)
     return results
Esempio n. 6
0
 def evaluate(self, sess, data_manager, id_to_tag):
     """
     :param sess: session  to run the model 
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()
     for batch in data_manager.iter_batch():
         strings = batch[0]
         tags = batch[-1]
         lengths, scores = self.run_step(sess, False, batch)
         batch_paths = self.decode(scores, lengths, trans)
         for i in range(len(strings)):
             result = []
             string = strings[i][:lengths[i]]
             gold = iobes_iob(
                 [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
             pred = iobes_iob(
                 [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
             for char, gold, pred in zip(string, gold, pred):
                 result.append(" ".join([char, gold, pred]))
             results.append(result)
     return results
Esempio n. 7
0
def eval_model(id_to_char, id_to_tag, test_manager, device, model_name=None):
    print("Eval ......")
    if not model_name:
        model_name = args.log_name
    old_weights = np.random.rand(len(id_to_char), args.word_embed_dim)
    pre_word_embed = load_word2vec("100.utf8", id_to_char, args.word_embed_dim,
                                   old_weights)
    e_model = Model(args, id_to_tag, device, pre_word_embed).to(device)
    e_model.load_state_dict(torch.load("./models/" + model_name + ".pkl"))
    print("model loaded ...")

    e_model.eval()
    all_results = []
    for batch in test_manager.iter_batch():

        strs, lens, chars, segs, subtypes, tags, adj, dep = batch
        chars = torch.LongTensor(chars).to(device)
        _lens = torch.LongTensor(lens).to(device)
        subtypes = torch.LongTensor(subtypes).to(device)
        tags = torch.LongTensor(tags).to(device)
        adj = torch.FloatTensor(adj).to(device)
        dep = torch.LongTensor(dep).to(device)
        logits, _ = e_model(chars, _lens, subtypes, adj, dep)
        """ Evaluate """
        # Decode
        batch_paths = []
        for index in range(len(logits)):
            length = lens[index]
            score = logits[index][:length]  # [seq, dim]
            probs = F.softmax(score, dim=-1)  # [seq, dim]
            path = torch.argmax(probs, dim=-1)  # [seq]
            batch_paths.append(path)

        for i in range(len(strs)):
            result = []
            string = strs[i][:lens[i]]
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lens[i]]])
            pred = iobes_iob(
                [id_to_tag[int(x)] for x in batch_paths[i][:lens[i]]])
            for char, gold, pred in zip(string, gold, pred):
                result.append(" ".join([char, gold, pred]))
            all_results.append(result)

    all_eval_lines = test_ner(all_results, args.result_path, args.log_name)
    res_info = all_eval_lines[1].strip()
    f1 = float(res_info.split()[-1])
    print("eval: f1: {}".format(f1))
    return f1, res_info
Esempio n. 8
0
    def run_step(self, sess, is_train, batch, need_reward=False, id_to_tag=dict()):
        """
        :param sess: session to run the batch
        :param is_train: a flag indicate if it is a train batch
        :param batch: a dict containing batch data
        :return: batch result, loss of the batch or logits
        """
        feed_dict = self.create_feed_dict(is_train, batch)
        if is_train:
            global_step, loss, _, lengths, action_prob = sess.run(
                [self.global_step, self.loss, self.train_op, self.lengths, self.action_prob],
                feed_dict)

            rewards = None
            if need_reward:
                strings = batch[0]
                tags = batch[-1]
                target_tags = []
                for i in range(len(strings)):
                    gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                    target_tags.append(gold)
                rewards = self.effective_rl.compute_tag_reward(target_tags)
                # print(rewards.shape, "!!!!!!!!!!!!! the tag rewards", rewards.shape[1], rewards.shape[1]>1)
                # rewards = np.reshape(rewards, [-1])

            return global_step, loss, rewards, action_prob
        else:
            lengths, logits = sess.run([self.lengths, self.logits], feed_dict)
            return lengths, logits
Esempio n. 9
0
 def evaluate(self, sess, data_num, char_to_id, tag_to_id, id_to_tag,
              data_type):
     """
     :param sess: session  to run the model 
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()
     total_loss = []
     for j in range(data_num):
         if data_type == "test":
             tag_path = 'NER_test/test_label' + str(j + 1) + '.npy'
             feature_path = 'NER_test/elmo_result' + str(j + 1) + '.npy'
             str_path = 'NER_test/test_sentence' + str(j + 1) + '.npy'
         if data_type == "dev":
             tag_path = 'NER_dev/dev_label' + str(j + 1) + '.npy'
             feature_path = 'NER_dev/elmo_result' + str(j + 1) + '.npy'
             str_path = 'NER_dev/dev_sentence' + str(j + 1) + '.npy'
         tag_load = np.load(tag_path)
         feature_load = np.load(feature_path)
         str_load = np.load(str_path)
         data_tags = []
         for sen in tag_load:
             tag = []
             for item in sen:
                 tag.append(tag_to_id[item])
             data_tags.append(tag)
         batch = convert_feature_tag(str_load, char_to_id, feature_load,
                                     data_tags)
         strings = batch[0]
         tags = batch[5]
         lengths, scores, testloss = self.run_step(sess, False, batch)
         total_loss.append(testloss)
         batch_paths = self.decode(scores, lengths, trans)
         for i in range(len(strings)):
             result = []
             string = strings[i][:lengths[i]]
             gold = iobes_iob(
                 [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
             pred = iobes_iob(
                 [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
             for char, gold, pred in zip(string, gold, pred):
                 result.append(" ".join([char, gold, pred]))
             results.append(result)
     return results, total_loss
Esempio n. 10
0
    def evaluate(self, sess, data_manager, id_to_tag):
        results = []
        for batch in data_manager.iter_batch():
            strings = batch[0]
            tags = batch[-1]
            lengths, scores = self.run_step(sess, False, batch)
            batch_paths = self.decode(scores, lengths)

            for i in range(len(strings)):
                result = []
                string = strings[i][:lengths[i]]
                gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                pred = iobes_iob([id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
                for char, gold, pred in zip(string, gold, pred):
                    result.append(" ".join([char, gold, pred]))
                results.append(result)
        return results
Esempio n. 11
0
def dev_epoch(epoch, model, dev_manager, id_to_tag, device):
    # dev
    model.eval()
    all_results = []
    for batch in dev_manager.iter_batch():

        strs, lens, chars, segs, subtypes, tags, adj, dep = batch
        chars = torch.LongTensor(chars).to(device)
        _lens = torch.LongTensor(lens).to(device)
        subtypes = torch.LongTensor(subtypes).to(device)
        tags = torch.LongTensor(tags).to(device)
        adj = torch.FloatTensor(adj).to(device)
        dep = torch.LongTensor(dep).to(device)
        logits, _ = model(chars, _lens, subtypes, adj,
                          dep)  # [batch, seq, dim]
        """ Evaluate """
        # Decode
        batch_paths = []
        for index in range(len(logits)):
            length = lens[index]
            score = logits[index][:length]  # [seq, dim]
            probs = F.softmax(score, dim=-1)  # [seq, dim]
            path = torch.argmax(probs, dim=-1)  # [seq]
            batch_paths.append(path)

        for i in range(len(strs)):
            result = []
            string = strs[i][:lens[i]]
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lens[i]]])
            pred = iobes_iob(
                [id_to_tag[int(x)] for x in batch_paths[i][:lens[i]]])
            for char, gold, pred in zip(string, gold, pred):
                result.append(" ".join([char, gold, pred]))
            all_results.append(result)

    all_eval_lines = test_ner(all_results, args.result_path, args.log_name)
    log_handler.info("epoch: {}, info: {}".format(epoch + 1,
                                                  all_eval_lines[1].strip()))
    f1 = float(all_eval_lines[1].strip().split()[-1])
    return f1, model
Esempio n. 12
0
    def evaluate(self, sess, dataset_name, id_to_tag, id_to_char):
        sess.run(self.make_dataset_init(dataset_name))
        results = []
        trans = self.trans.eval()
        try:
            while True:
                inputs, tags, lengths, scores = self.run_step(sess, False)
                batch_paths = self.decode(scores, lengths, trans)
                for i in range(len(inputs)):
                    inp = inputs[i][:lengths[i]]
                    string = [id_to_char[ip] for ip in inp]
                    gold = iobes_iob(
                        [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                    pred = iobes_iob([
                        id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]
                    ])
                    results.append([string, gold, pred])

        except tf.errors.OutOfRangeError:
            pass

        return results
Esempio n. 13
0
 def evaluate(self, sess, data_manager, id_to_tag):
     """
     :param sess: session  to run the model 
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     results = []
     trans = self.trans.eval()
     for batch in data_manager.iter_batch():
         strings = batch[0]
         tags = batch[-1]
         lengths, scores = self.run_step(sess, False, batch)
         batch_paths = self.decode(scores, lengths, trans)
         for i in range(len(strings)):
             result = []
             string = strings[i][:lengths[i]]
             gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
             pred = iobes_iob([id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
             for char, gold, pred in zip(string, gold, pred):
                 result.append(" ".join([char, gold, pred]))
             results.append(result)
     return results
Esempio n. 14
0
    def evaluate(self, sess, data_manager, id_to_tag, precision_loc,
                 precision_per, precision_org):
        """
        :param sess: session  to run the model 
        :param data: list of data
        :param id_to_tag: index to tag name
        :return: evaluate result
        """
        results = []
        trans = self.trans.eval()
        predds = []
        for i in range(len(precision_loc)):
            pred1 = precision_loc[i]
            pred2 = precision_per[i]
            pred3 = precision_org[i]
            preds = pred1
            for j in range(len(pred1)):
                value1 = pred1[j]
                value2 = pred2[j]
                value3 = pred3[j]
                if value1 == value2 == value3:
                    value = value1
                elif value1 == value2 and value2 != value3:
                    value = value1
                elif value1 == value3 and value2 != value3:
                    value = value1
                elif value2 == value3 and value1 != value3:
                    value = value2
                else:
                    value = value1
                preds[j] = value

            predds.append(preds)
        for batch in data_manager.iter_batch():
            strings = batch[0]
            tags = batch[-1]
            lengths, scores = self.run_step(sess, False, batch)

            for i in range(len(strings)):
                result = []
                string = strings[i][:lengths[i]]
                gold = iobes_iob(
                    [id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
                pred = predds[i]
                for char, gold, pred in zip(string, gold, pred):
                    result.append(" ".join([char, gold, pred]))
                results.append(result)
        return results
Esempio n. 15
0
    def run_rl(self, sess, is_train, batch, id_to_tag):
        feed_dict = self.create_feed_dict(is_train, batch)
        global_step, loss, lengths, action_prob = sess.run(
            [self.global_step, self.loss, self.lengths, self.action_prob],
            feed_dict)

        strings = batch[0]
        tags = batch[-1]
        target_tags = []
        for i in range(len(strings)):
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lengths[i]]])
            target_tags.append(gold)
        rewards = self.effective_rl.compute_tag_reward(target_tags)
        # print(rewards.shape, "!!!!!!!!!!!!! the tag rewards", rewards.shape[1], rewards.shape[1]>1)
        #rewards = np.reshape(rewards, [-1])
        feed_dict[self.rewards] = rewards
        global_step, rl_loss, _ = sess.run([self.global_step, self.rl_loss, self.train_rl_op], feed_dict)
        return global_step, loss, rl_loss
Esempio n. 16
0
 def precision(self, sess, data_manager, id_to_tag):
     """
     :param sess: session  to run the model
     :param data: list of data
     :param id_to_tag: index to tag name
     :return: evaluate result
     """
     preds = []
     trans = self.trans.eval()
     for batch in data_manager.iter_batch():
         strings = batch[0]
         lengths, scores = self.run_step(sess, False, batch)
         batch_paths = self.decode(scores, lengths, trans)
         for i in range(len(strings)):
             pred = []
             string = strings[i][:lengths[i]]
             pred = iobes_iob(
                 [id_to_tag[int(x)] for x in batch_paths[i][:lengths[i]]])
             preds.append(pred)
     return preds
Esempio n. 17
0
    def predict(self, sess, batch, label_vocab):
        """
        Args:
            sess: session  to run the model
            batch: [strings, char_ids, seg_ids, _]
            label_vocab: index to tag name

        Return:
            result: list of labels
        """
        results = []
        trans = self.trans.eval(sess)

        strings = batch[0]
        lengths, scores = self.run_step(sess, False, batch)
        batch_paths = self.decode(scores, lengths, trans)
        for i in range(len(strings)):
            result = []
            string = strings[i][:lengths[i]]
            pred = iobes_iob(
                [label_vocab[int(x)] for x in batch_paths[i][:lengths[i]]])
            pred = check_iob(pred)
            results.append(pred)
        return results