Example #1
0
def evaluate():
    wordss, tagss, lengths = test_helper.gen_batch().__next__()
    sentence_in = prepare_sequence(wordss, word_to_ix)
    target_tag_seqs = prepare_sequence(tagss, tag_to_ix)
    predict_scores, predict_tag_seqs = model(sentence_in, lengths)
    for tag in ['a', 'b', 'c']:
        f1_score(target_tag_seqs, predict_tag_seqs, tag, tag_to_ix, lengths)
Example #2
0
 def pre_losses(self):
     if not self.all_pre_preds:
         return None
     pre_preds = np.concatenate(self.all_pre_preds)
     labels = np.concatenate(self.all_labels)
     micro = util.f1_score(pre_preds, labels, 0.5)
     macro = util.f1_score(pre_preds, labels, 0.5, average='macro')
     return micro, macro
Example #3
0
def find_context_index(question, answer, contexts):
    context_list = splitSentence(contexts)
    print(context_list)
    print("the len od list is " + len(context_list).__str__())
    sim = 0.0
    qas = answer + " " + question
    for i in range(len(context_list)):
        print("precessing the " + i.__str__() + " th ")
        if sim < f1_score(qas, context_list[i]):
            sim = f1_score(qas, context_list[i])
            index = i
            print("get the best index" + index.__str__())
    print("the best index is:" + index.__str__())
    print("the best context is" + context_list[index])
    return i + 1
Example #4
0
def calc_f1score():
    prs = [0.529, 0.5892]
    rrs = [0.6609, 0.6548]
    idx = [200, 800]

    for (i, pr, rr) in zip(idx, prs, rrs):
        print(i, pr, rr, round(f1_score(pr, rr), 4))
Example #5
0
def decode_validate(model,
                    sess,
                    q_valid,
                    reverse_src_vocab,
                    reverse_tgt_vocab,
                    save_dir,
                    epoch,
                    sample=5,
                    print_decode=False):

    print_decode = print_decode if print_decode else FLAGS.print_decode
    num_decoded = 0

    # add f1, em measure on this decoding
    f1 = 0.
    em = 0.
    saved_list = []

    # since we did beam-decode, I can measure EM on the top-5 result

    with open(pjoin(save_dir, "valid_decode_e" + str(epoch) + ".txt"),
              "wb") as f:
        for source_tokens, source_mask, target_tokens, target_mask in pair_iter(
                q_valid, 1, FLAGS.input_len, FLAGS.query_len):
            # transpose them because how this model is set up
            source_tokens, source_mask, target_tokens, target_mask = source_tokens.T, source_mask.T, target_tokens.T, target_mask.T
            # seems like detokenize can handle batch
            src_sent = detokenize(source_tokens, reverse_src_vocab)
            tgt_sent = detokenize(target_tokens, reverse_tgt_vocab)

            # Encode
            encoder_output = model.encode(sess, source_tokens, source_mask)
            # Decode
            beam_toks, probs = decode_beam(model, sess, encoder_output,
                                           FLAGS.beam_size)
            # De-tokenize
            beam_strs = detokenize(beam_toks, reverse_tgt_vocab, decode=True)

            best_str = beam_strs[0]  # we can also get probability on them

            num_decoded += 1

            f1 += f1_score(best_str, " ".join(tgt_sent[1:]))
            # tgt_sent's first array element is always [""]
            em += exact_match_score(best_str, " ".join(tgt_sent[1:]))

            if num_decoded <= sample:
                logging.info("input: {}".format(" ".join(src_sent)))
                logging.info("truth: {}".format(" ".join(tgt_sent[1:])))
                logging.info("decoded: {}".format(best_str))
                logging.info("")

            saved_list.append({
                "input": src_sent,
                "truth": tgt_sent[1:],
                "decoded": best_str
            })

    return float(f1) / float(num_decoded), float(em) / float(
        num_decoded), saved_list
Example #6
0
 def output(self, step, train=True):
     p, r, f = util.f1_score(self.probs, self.labels, 0.5)
     ap = util.auc_pr(self.probs, self.labels)
     try:
         auc = util.auc_roc(self.probs, self.labels)
     except ValueError:
         auc = float('nan')
     print(
         "S:%d.  Precision: %.4f, Recall: %.4f, F-score: %.4f, AUC(PR): %.4f, AUC(ROC): %.4f, "
         % (step, p, r, f, ap, auc))
Example #7
0
 def losses(self, perclass=False, train=False):
     if not self.all_probs:
         return None
     probs = np.concatenate(self.all_probs)
     labels = np.concatenate(self.all_labels)
     # micro-averaged stats
     p, r, f = util.f1_score(probs, labels, 0.5)
     ap = util.auc_pr(probs, labels)
     try:
         auc = util.auc_roc(probs, labels)
     except ValueError:
         auc = float('nan')
     micro = [p, r, f, ap, auc]
     # macro-averaged stats
     p, r, f = util.f1_score(probs, labels, 0.5, average='macro')
     ap = util.auc_pr(probs, labels, average='macro')
     try:
         auc = util.auc_roc(probs, labels, average='macro')
     except ValueError:
         auc = float('nan')
     macro = [p, r, f, ap, auc]
     return micro, macro
Example #8
0
    def evaluate_answer(self, session, q, rev_src_vocab, rev_tgt_vocab, sample=100, print_every=100):
        # this is teacher-forcing evaluation, not even greedy decode
        f1 = 0.
        em = 0.
        size = 0.

        # python list: outputs = outputs[:outputs.index(data_utils.EOS_ID)]
        # but must make sure EOS_ID is in there otherwise this throws an error

        for inp_tokens, inp_mask, query_tokens, query_mask in pair_iter(q, self.batch_size, self.inp_len,
                                                                        self.query_len):
            # decoder_output = self.decode(session, inp_tokens, inp_mask, query_tokens, query_mask)
            encoder_output = self.get_encode(session, inp_tokens, inp_mask)
            decoder_output = self.decode_greedy_batch(session, encoder_output, self.batch_size)
            print(decoder_output)

            # decoder_tokens = np.argmax(decoder_output, axis=-1)

            # those are batched right now
            # decoder_tokens = np.squeeze(decoder_tokens) * query_mask
            # query_tokens = query_tokens * query_mask

            batch_size = inp_tokens.shape[0]
            # query_len = np.sum(query_mask, axis=1)

            for i in range(batch_size):
                decoder_token = self.detokenize(decoder_output[i,:], rev_tgt_vocab)
                query_token = self.detokenize(query_tokens[i,:], rev_tgt_vocab)

                f1 += f1_score(decoder_token, query_token)
                em += exact_match_score(decoder_token, query_token)

                size += 1

                if size % print_every == 0:
                    decoded_parse = decoder_token
                    true_parse = query_token
                    decoded_input = [rev_src_vocab[j] for j in inp_tokens[i, :] if j != data_util.PAD_ID]
                    print("input: {}".format(" ".join(decoded_input)))
                    print("decoded result: {}".format(decoded_parse))
                    print("ground truth result: {}".format(true_parse))

            if size >= sample:
                break

        f1 /= size
        em /= size

        return f1, em
Example #9
0
 def output(self, step, train=True):
     p, r, f = util.f1_score(self.probs, self.labels, 0.5)
     ap = util.auc_pr(self.probs, self.labels)
     try:
         auc = util.auc_roc(self.probs, self.labels)
     except ValueError:
         auc = float('nan')
     loss_str = "GS:%d, S:%d.  Loss: %.4f, Precision: %.4f, Recall: %.4f, F-score: %.4f, " \
                "AUC(PR): %.4f, AUC(ROC): %.4f" % (self.global_step, step, self.loss, p, r, f,
                                                   ap, auc)
     pr_strs = []
     for k in self.config.pr_at_k:
         pk = util.precision_at_k(self.probs, self.labels, k)
         rk = util.recall_at_k(self.probs, self.labels, k)
         pr_strs.append("Precision@%d: %.4f, Recall@%d: %.4f" %
                        (k, pk, k, rk))
     pr_str = ', '.join(pr_strs)
     wps_str = "WPS: %.2f" % self.wps
     print(', '.join([loss_str, pr_str, wps_str]))
Example #10
0
 def run_session(self, notes, lengths, labels, train=True):
     n_words = lengths.sum()
     start = time.time()
     notes = notes.tolist()
     lengths = lengths.tolist()
     X_raw = []
     for note, length in zip(notes, lengths):
         if not length:
             break
         note = note[1:length - 1]
         out_note = []
         for word in note:
             out_note.append(self.vocab.vocab[word])
         X_raw.append(' '.join(out_note))
     data = self.model.vectorizer.transform(X_raw, copy=False).toarray()
     labels = labels[:len(X_raw)]
     ops = [self.model.loss, self.model.probs, self.model.global_step]
     if train:
         ops.append(self.model.train_op)
     ret = self.session.run(ops,
                            feed_dict={
                                self.model.data: data,
                                self.model.labels: labels
                            })
     self.loss, self.probs, self.global_step = ret[:3]
     self.labels = labels
     # TODO remove this and use AUC(PR) to determine best hyperparameters:
     if self.config.bow_search and not train:
         prf = {}
         for thres in np.arange(0.1, 0.75, 0.1):
             prf[int(thres * 10)] = util.f1_score(self.probs,
                                                  labels,
                                                  thres,
                                                  average=None)[-1]
         self.current_stats.append(prf)
     end = time.time()
     self.wps = n_words / (end - start)
     self.accumulate()
Example #11
0
def group_eval(pred='./performance/result_val_sort.csv',
               label='../Data/val.csv'):
    pred_df = pd.read_csv(pred)
    label_df = pd.read_csv(label)

    pred_v = pred_df.values
    label_v = label_df.values

    print(pred_v.shape, label_v.shape)

    row, col = pred_v.shape

    prs = []
    rrs = []

    sum_preds = np.sum(pred_v, axis=1)
    sum_labels = np.sum(label_v, axis=1)
    for i in range(row):
        cnt = 0
        for j in range(col):
            if pred_v[i][j] and label_v[i][j]:
                cnt += 1

        pr = cnt / sum_preds[i] if sum_preds[i] > 0 else 0
        rr = cnt / sum_labels[i] if sum_labels[i] > 0 else 0
        prs.append(pr)
        rrs.append(rr)

    idxs = np.argsort(cnts)[::-1]
    f1s = []
    for i, pr, rr in zip(idxs, np.array(prs)[idxs], np.array(rrs)[idxs]):
        f1 = f1_score(pr, rr)
        f1s.append(f1)
        print(i, round(100 * pr, 2), round(100 * rr, 2), round(100 * f1, 2))

    print('Macro rr: {}, pr: {}, f1: {}.'.format(
        np.round(100 * np.average(rrs), 2), np.round(100 * np.average(prs), 2),
        np.round(100 * np.average(f1s), 2)))

    cnt = 0
    for i in range(row):
        for j in range(col):
            if pred_v[i][j] and label_v[i][j]:
                cnt += 1

    pr = cnt / np.sum(pred_v)
    rr = cnt / np.sum(label_v)

    print('Micro rr: {}, pr: {}, f1: {}.'.format(
        round(100 * rr, 2), round(100 * pr, 2), round(100 * f1_score(rr, pr),
                                                      2)))

    arr_perts = sum_labels[idxs] / col
    # for i in range(len(idxs)):
    #     print(idxs[i], sum_labels[idxs][i], round(100*arr_perts[i], 2))
    arr_f1s = np.array(f1s)
    arr_rrs = np.array(rrs)[idxs]
    arr_prs = np.array(prs)[idxs]
    plt.figure()
    plt.title(pred.split('/')[-1])
    plt.plot(arr_prs, 'g-', label='pr')
    plt.plot(arr_rrs, 'b-', label='rr')
    plt.plot(arr_f1s, 'r-', label='f1')
    plt.plot(arr_perts, 'k.', label='percentage')
    plt.xticks(range(28), idxs)
    plt.legend()
    plt.grid()
    plt.xlim([0, 30])
    plt.ylim([0, 1])
    plt.show()
Example #12
0
 def pre_output(self, step, train=True):
     p, r, f = util.f1_score(self.pre_preds, self.labels, 0.5)
     print("GS:%d, S:%d.  Precision: %.4f, Recall: %.4f, F-score: %.4f" % (self.global_step,
                                                                           step, p, r, f))
Example #13
0
 def losses(self,
            perclass=False,
            train=False,
            max_samples_in_chunk=(30000, 50000)):
     '''Return the accumulated losses'''
     if not self.all_losses:
         return None
     if train:
         max_samples_in_chunk = max_samples_in_chunk[0]
     else:
         max_samples_in_chunk = max_samples_in_chunk[1]
     max_batches_in_chunk = max_samples_in_chunk / self.config.batch_size
     loss = np.mean(self.all_losses)
     splits = int(0.999 + (len(self.all_probs) / max_batches_in_chunk))
     chunk_size = int(0.999 + (len(self.all_probs) / splits))
     ret_micro = []
     ret_macro = []
     ret_perclass = []
     for i in xrange(0, len(self.all_probs), chunk_size):
         all_probs = self.all_probs[i:i + chunk_size]
         all_labels = self.all_labels[i:i + chunk_size]
         probs = np.concatenate(all_probs)
         labels = np.concatenate(all_labels)
         if self.config.test_labels > 0:
             probs = probs[:, :self.config.test_labels]
             labels = labels[:, :self.config.test_labels]
         # micro-averaged stats
         p, r, f = util.f1_score(probs, labels, 0.5)
         ap = util.auc_pr(probs, labels)
         try:
             auc = util.auc_roc(probs, labels)
         except ValueError:
             auc = float('nan')
         micro = [p, r, f, ap, auc]
         for k in self.config.pr_at_k:
             if train:
                 # don't spend time on this for train set
                 pk = float('nan')
                 rk = float('nan')
             else:
                 pk = util.precision_at_k(probs, labels, k)
                 rk = util.recall_at_k(probs, labels, k)
             micro.extend([pk, rk])
         # macro-averaged stats
         p, r, f = util.f1_score(probs, labels, 0.5, average='macro')
         if self.config.macro_auc:
             ap = util.auc_pr(probs, labels, average='macro')
             try:
                 auc = util.auc_roc(probs, labels, average='macro')
             except ValueError:
                 auc = float('nan')
         else:
             ap, auc = float('nan'), float('nan')
         macro = [p, r, f, ap, auc]
         # non-avereged stats for plotting
         if perclass:
             p, r, f = util.f1_score(probs, labels, 0.5, average=None)
             ap = util.auc_pr(probs, labels, average=None)
             try:
                 auc = util.auc_roc(probs, labels, average=None)
             except ValueError:
                 auc = float('nan')
             perclass = [p, r, f, ap, auc]
         else:
             perclass = float('nan')
         ret_micro.append(micro)
         ret_macro.append(macro)
         ret_perclass.append(perclass)
         if train:
             break
     return (loss, np.mean(ret_micro,
                           0), np.mean(ret_macro,
                                       0), np.mean(ret_perclass, 0))
Example #14
0
def train(net, trainIter, validIter, config):
    DEVICE = config['DEVICE']
    modelSavePath = config['modelSavePath']
    epochNum = config['model']['epochNum']
    learningRate = config['model']['learningRate']
    earlyStop = config['model']['earlyStop']

    #权重初始化
    for name, value in net.named_parameters():
        if 'pretrainedModel' not in name:
            if value.dim() > 1: nn.init.xavier_uniform_(value)

    # no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    # bert_param_no = [value for name, value in net.named_parameters() if name in no_decay and 'bertModel' in name]
    # bert_param_yes = [value for name, value in net.named_parameters() if name not in no_decay and 'bertModel' in name]

    # other_param_no = [value for name, value in net.named_parameters() if name in no_decay and 'bertModel' not in name]
    # other_param_yes = [value for name, value in net.named_parameters() if name not in no_decay and 'bertModel' not in name]

    # optimizer_grouped_parameters = [
    #     {'params': bert_param_yes, 'weight_decay': 0.01, 'lr': learningRate},
    #     {'params': bert_param_no, 'weight_decay': 0.0, 'lr': learningRate},
    #     {'params': other_param_yes, 'weight_decay': 0.01, 'lr': 0.001},
    #     {'params': other_param_no, 'weight_decay': 0.0, 'lr': 0.001}]

    bert_params = [
        value for name, value in net.named_parameters()
        if 'pretrainedModel' in name
    ]
    other_params = [
        value for name, value in net.named_parameters()
        if 'pretrainedModel' not in name
    ]

    params = [{
        'params': bert_params,
        'lr': 5e-5
    }, {
        'params': other_params,
        'lr': learningRate
    }]

    optimizer = AdamW(params, eps=1e-8)

    earlyNumber, beforeLoss = 0, sys.maxsize
    trainLossSave, validLossSave, f1ScoreSave, accurateSave, recallSave = 0, 0, 0, 0, 0

    for epoch in range(epochNum):
        print('第%d次迭代\n' % (epoch + 1))
        #训练
        net.train()
        trainLoss, number = 0, 0
        for batchSentence, batchTag, _, _ in tqdm(trainIter):
            batchSentence = batchSentence.to(DEVICE)
            batchTag = batchTag.to(DEVICE)
            net.zero_grad()
            loss = net(batchSentence, batchTag)
            #多卡训练
            if torch.cuda.device_count() > 1: loss = loss.mean()

            loss.backward()

            #梯度裁剪
            nn.utils.clip_grad_norm_(net.parameters(), 1.0)

            optimizer.step()
            trainLoss += loss.item()
            number += 1
        trainLoss = trainLoss / number

        #验证
        net.eval()
        validLoss, number = 0, 0
        yTrue, yPre, ySentence, probArr = [], [], [], []
        with torch.no_grad():
            for batchSentence, batchTag, lenList, originSentence in tqdm(
                    validIter):
                batchSentence = batchSentence.to(DEVICE)
                batchTag = batchTag.to(DEVICE)
                loss = net(batchSentence, batchTag)
                #多卡训练
                if torch.cuda.device_count() > 1:
                    loss = loss.mean()
                    tagPre, prob = net.module.decode(batchSentence)
                else:
                    tagPre, prob = net.decode(batchSentence)
                tagTrue = [
                    element[:length]
                    for element, length in zip(batchTag.cpu().numpy(), lenList)
                ]
                yTrue.extend(tagTrue)
                yPre.extend(tagPre)
                ySentence.extend(originSentence)
                probArr.extend(prob)
                validLoss += loss.item()
                number += 1

        yTrue2tag = [[id2tag[element2] for element2 in element1]
                     for element1 in yTrue]
        yPre2tag = [[id2tag[element2] for element2 in element1]
                    for element1 in yPre]

        assert len(yTrue2tag) == len(yPre2tag)
        assert len(ySentence) == len(yTrue2tag)

        f1Score, accurate, recall = f1_score(y_true=yTrue2tag, y_pred=yPre2tag)

        validLoss = validLoss / number

        print('训练损失为: %f\n' % trainLoss)
        print('验证损失为: %f / %f\n' % (validLoss, beforeLoss))
        print('f1_Score、accurate、recall: %f、%f、%f\n' %
              (f1Score, accurate, recall))

        if validLoss < beforeLoss:
            beforeLoss = validLoss
            if torch.cuda.device_count() > 1:
                torch.save(net.module.state_dict(), modelSavePath)
            else:
                torch.save(net.state_dict(), modelSavePath)
            trainLossSave, validLossSave = trainLoss, validLoss
            f1ScoreSave, accurateSave, recallSave = f1Score, accurate, recall

            if 'validResultPath' in config.keys():
                path = config['validResultPath']
                f = open(path, 'w', encoding='utf-8', errors='ignore')
                for sentence, prob in zip(ySentence, probArr):
                    for sentenceEle, probEle in zip(sentence, prob):
                        probEle = '\t'.join(
                            [str(element) for element in probEle])
                        f.write('%s\t%s\n' % (sentenceEle, probEle))
                    f.write('\n')
                f.close()

        #早停机制
        if validLoss > beforeLoss:
            earlyNumber += 1
            print('earyStop: %d / %d\n' % (earlyNumber, earlyStop))
        else:
            earlyNumber = 0
        if earlyNumber >= earlyStop:
            break

    #计算验证集中的实际效果

    ###临时###
    f = open('temp.txt', 'w', encoding='utf-8', errors='ignore')
    for sentence, trueTag, preTag in zip(ySentence, yTrue2tag, yPre2tag):
        trueEntity = '@'.join(
            acquireEntity([sentence], [trueTag], method='BIOES'))
        preEntity = '@'.join(
            acquireEntity([sentence], [preTag], method='BIOES'))

        if trueEntity != preEntity:
            f.write(''.join(sentence) + '\n')
            f.write('True:' + trueEntity + '\n')
            f.write('Pre:' + preEntity + '\n')
    f.close()

    return trainLossSave, validLossSave, f1ScoreSave, accurateSave, recallSave