Example #1
0
def main():
    args = parse_args()
    print("Loading data file...")
    filename = args.data_dir + '/{}.json'.format(args.dataset)
    with open(filename, 'r') as infile:
        data = json.load(infile, encoding='utf8')
    labels = [d['relation'] for d in data]

    # read predictions

    #args.pred_files = ['./res_pkl/trans.pkl','./res_pkl/palstm.pkl']
    args.pred_files = ['./res_pkl/palstm.pkl']
    print("Loading {} prediction files...".format(len(args.pred_files)))
    scores_list = []
    for path in args.pred_files:
        with open(path, 'rb') as infile:
            scores = pickle.load(infile)
            scores_list += [scores]

    print("Calculating ensembled predictions...")
    predictions = []
    scores_by_examples = list(zip(*scores_list))
    assert len(scores_by_examples) == len(data)
    for scores in scores_by_examples:
        pred = ensemble(scores)
        predictions += [pred]
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
    predictions = [id2label[p] for p in predictions]
    scorer.score(labels, predictions, verbose=True)
Example #2
0
    def _valid_epoch(self):
        self.model.eval()
        val_loss = 0
        preds = []
        labels = []
        with torch.no_grad():
            for idx, inputs in enumerate(self.valid_data_loader):
                org_idx = inputs[-2]
                inputs = [
                    item.to(self.device) for item in inputs
                    if not isinstance(item, list)
                ]
                outputs, pooling_out = self.model(inputs[:-1])
                val_loss += self.criterion(outputs, inputs[-1])
                pred = F.softmax(outputs, 1).data.cpu().numpy().tolist()
                pred = np.argmax(pred, axis=1)
                preds.extend(pred)
                labels += inputs[-1].tolist()

        preds = [self.data_loader.id2label[pred] for pred in preds]
        labels = [self.data_loader.id2label[label] for label in labels]

        valid_p, valid_r, valid_f1 = scorer.score(labels, preds, verbose=True)
        self.logger.info(
            ' validation precision is : {:.3f}, validation recall is : {:.3f}, validation f1_macro is : {:.3f}, bset scores is : {}'
            .format(valid_p, valid_r, valid_f1, self.best_score))
        return val_loss / len(self.valid_data_loader), valid_f1
Example #3
0
def main():
    args = parse_args()
    print("Loading data file...")
    filename = args.data_dir + '/{}.json'.format(args.dataset)
    with open(filename, 'r') as infile:
        data = json.load(infile, encoding='utf8')
    labels = [d['relation'] for d in data]

    # read predictions
    print("Loading {} prediction files...".format(len(args.pred_files)))
    scores_list = []
    for path in args.pred_files:
        with open(path, 'rb') as infile:
            scores = pickle.load(infile)
            scores_list += [scores]

    print("Calculating ensembled predictions...")
    predictions = []
    scores_by_examples = list(zip(*scores_list))
    assert len(scores_by_examples) == len(data)
    for scores in scores_by_examples:
        pred = ensemble(scores)
        predictions += [pred]
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
    predictions = [id2label[p] for p in predictions]
    precision, recall, f1, mistake = scorer.score(labels,
                                                  predictions,
                                                  verbose=True)
    with open("logs.txt", 'a') as f:
        f.write("Temp:" + args.temp + '\tAlpha:' + args.alpha + '\n')
        f.write("Precision:%f\n" % precision)
        f.write("Recall:%f\n" % recall)
        f.write("F1:%f\n" % f1)
        f.write("-------------------\n")
Example #4
0
def select_samples(model_p, model_s, dataset_infer, k_samples, args,
                   default_distribution):
    max_upperbound = int(math.ceil(k_samples * args.selector_upperbound))
    # predictor selection
    meta_idxs_p, confidence_idxs_p = model_p.retrieve(
        dataset_infer, len(dataset_infer))  # retrieve all the samples
    print("Infer on predictor: ")  # Track performance of predictor alone
    gold, guess = [t[2] for t in meta_idxs_p[:k_samples]
                   ], [t[1] for t in meta_idxs_p[:k_samples]]
    scorer.score(gold, guess, verbose=False)
    scorer.score(gold, guess, verbose=False, NO_RELATION="-1")

    # for self-training
    if args.integrate_method == "p_only":
        return split_samples(dataset_infer, meta_idxs_p[:k_samples],
                             args.batch_size)

    # selector selection
    label_distribution = None
    if args.integrate_method == "s_only" or max_upperbound == 0:
        label_distribution = default_distribution
    else:
        label_distribution = get_relation_distribution(
            meta_idxs_p[:max_upperbound])

    def s_retrieve_fn(k_samples, label_distribution):
        return model_s.retrieve(dataset_infer,
                                k_samples,
                                label_distribution=label_distribution)

    ori_meta_idxs_s, _ = s_retrieve_fn(k_samples, label_distribution)
    print("Infer on selector: ")
    gold, guess = [t[2]
                   for t in ori_meta_idxs_s], [t[1] for t in ori_meta_idxs_s]
    scorer.score(gold, guess, verbose=False)
    scorer.score(gold, guess, verbose=False, NO_RELATION="-1")

    # If we only care about performance of selector
    if args.integrate_method == "s_only":
        return split_samples(dataset_infer, ori_meta_idxs_s)

    # integrate method
    if args.integrate_method == "intersection":
        meta_idxs, confidence_idxs_s = intersect_samples(
            meta_idxs_p, s_retrieve_fn, k_samples, label_distribution)
    else:
        raise NotImplementedError("integrate_method {} not implemented".format(
            args.integrate_method))
    confidence_dict_p = dict(
        (id, confidence) for id, confidence in confidence_idxs_p)
    confidence_dict_s = dict(
        (id, confidence) for id, confidence in confidence_idxs_s)
    return split_samples(dataset_infer,
                         meta_idxs,
                         conf_p=confidence_dict_p,
                         conf_s=confidence_dict_s)
Example #5
0
def evaluate_model(evalparams):

    torch.manual_seed(evalparams.seed)
    random.seed(1234)
    if evalparams.cpu:
        evalparams.cuda = False
    elif evalparams.cud:
        torch.cuda.manual_seed(args.seed)

    # load opt
    print(evalparams.model_dir, evalparams.model)
    #     model_file = evalparams.model_dir + "/" + evalparams.model
    model_file = 'best_model.pt'
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = evalparams.model_dir + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # load data
    data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt['batch_size']))
    batch = DataLoader(data_file,
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)

    helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    predictions = []
    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, _ = model.predict(b)
        predictions += preds
        all_probs += probs
    predictions = [id2label[p] for p in predictions]
    p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True)

    # save probability scores
    if len(evalparams.out) > 0:
        helper.ensure_dir(os.path.dirname(evalparams.out))
        with open(evalparams.out, 'wb') as outfile:
            pickle.dump(all_probs, outfile)
        print("Prediction scores saved to {}.".format(evalparams.out))

    print("Evaluation ended.")

    return (batch.gold(), predictions, model)
Example #6
0
def intersect_samples(meta_idxs1, s_retrieve_fn, k_samples,
                      prior_distribution):
    upperbound, meta_idxs, confidence_idxs_s = k_samples, [], []
    while len(meta_idxs) < min(k_samples, len(meta_idxs1)):
        upperbound = math.ceil(1.25 * upperbound)
        ori_meta_idxs_s, confidence_idxs_s = s_retrieve_fn(
            upperbound, prior_distribution)
        meta_idxs = sorted(
            set(meta_idxs1[:upperbound]).intersection(
                set(ori_meta_idxs_s)))[:k_samples]
        if upperbound > k_samples * 30:  # set a limit for growing upperbound
            break
    print("Infer on combination...")
    scorer.score([actual for _, _, actual in meta_idxs],
                 [pred for _, pred, _ in meta_idxs],
                 verbose=False)
    scorer.score([actual for _, _, actual in meta_idxs],
                 [pred for _, pred, _ in meta_idxs],
                 verbose=False,
                 NO_RELATION="-1")
    return meta_idxs, confidence_idxs_s
Example #7
0
def evaluate(model, dataset, evaluate_type='prf', verbose=False):
    rel_stoi, rel_itos = model.opt['rel_stoi'], model.opt['rel_itos']
    iterator_test = data.Iterator(dataset=dataset,
                                  batch_size=model.opt['batch_size'],
                                  repeat=False,
                                  train=True,
                                  shuffle=False,
                                  sort=True,
                                  sort_key=lambda x: -len(x.token),
                                  sort_within_batch=False)

    if evaluate_type == 'prf':
        predictions = []
        all_probs = []
        golds = []
        all_loss = 0
        for batch in iterator_test:
            inputs, target = batch_to_input(batch, model.opt['vocab_pad_id'])
            preds, probs, loss = model.predict(inputs, target)
            predictions += preds
            all_probs += probs
            all_loss += loss
            golds += target.data.tolist()
        predictions = [rel_itos[p] for p in predictions]
        golds = [rel_itos[p] for p in golds]
        p, r, f1 = scorer.score(golds, predictions, verbose=verbose)
        return p, r, f1, all_loss
    elif evaluate_type == 'auc':
        logits, labels = [], []
        for batch in iterator_test:
            inputs, target = batch_to_input(batch, model.opt['vocab_pad_id'])
            logits += model.predict(inputs)[0]
            labels += batch.relation.data.numpy().tolist()
        p, q = 0, 0
        for rel in range(len(rel_itos)):
            if rel == rel_stoi['no_relation']:
                continue
            logits_rel = [logit[rel] for logit in logits]
            labels_rel = [1 if label == rel else 0 for label in labels]
            ranking = list(zip(logits_rel, labels_rel))
            ranking = sorted(ranking, key=lambda x: x[0], reverse=True)
            logits_rel, labels_rel = zip(*ranking)
            p += scorer.AUC(logits_rel, labels_rel)
            q += 1

        dev_auc = p / q * 100
        return dev_auc, None, None, None
Example #8
0
def get_scores(data_file, opt, vocab, model):
    print(
        "Loading data from {} with batch size {}...".format(
            data_file, opt["batch_size"]
        )
    )
    batch = DataLoader(data_file, opt["batch_size"], opt, vocab, evaluation=True)

    predictions = []
    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, attn_weights, _ = model.predict(b)
        predictions += preds
        all_probs += probs
    predictions = [id2label[p] for p in predictions]
    
    # print("predictions")
    # for a, b in zip(batch.gold(), predictions):
    # 	print(f"{a:<28} {b:<28}")

    p, r, f1 = scorer.score(batch.gold(), predictions, verbose=False)
    return p, r, f1
Example #9
0
    for i, b in enumerate(batch_iter):
        preds, probs, loss = trainer.predict(b)
        predictions += preds
        all_probs += probs
        losses += loss
    prob_a = np.array(all_probs)
    prob_list.append(prob_a)

#prob_all = (prob_list[0] + prob_list[1] + prob_list[2] + prob_list[3] + prob_list[4])/5
prob_all = (prob_list[0] + prob_list[1] + prob_list[2] + prob_list[3]) / 4

# pa_tree_file = 'saved_models/other_method/pa_tree_pro.npy'
# pa_tree_pro = np.load(pa_tree_file)
#
# prob_all = 0.25 * pa_tree_pro + 0.75 * prob_all

prob_all = torch.from_numpy(prob_all)

pre_out = np.argmax(prob_all.data.cpu().numpy(), axis=1).tolist()

label_out = [id2label[p] for p in pre_out]

p, r, f1 = scorer.score(batch.gold(), label_out, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))
# losses = losses / len(batch_iter)
# print(losses)

print("Evaluation ended.")
print('a')
Example #10
0
            duration = time.time() - start_time
            print(format_str.format(datetime.now(), global_step, max_steps, epoch,\
                    opt['num_epoch'], loss, duration, current_lr))

    # eval on dev
    print("Evaluating on dev set...")
    dev_predictions = []
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss = trainer.predict(batch)
        dev_predictions += preds
        dev_loss += loss
    dev_predictions = [id2label[p] for p in dev_predictions]
    dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']

    dev_metrics, _ = scorer.score(dev_batch.gold(), dev_predictions)
    dev_p, dev_r, dev_f1 = dev_metrics['precision'], dev_metrics[
        'recall'], dev_metrics['f1']
    print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".
          format(epoch, train_loss, dev_loss, dev_f1))
    dev_score = dev_f1
    file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format(
        epoch, train_loss, dev_loss, dev_score,
        max([dev_score] + dev_score_history)))
    current_dev_metrics = {'f1': dev_f1, 'precision': dev_p, 'recall': dev_r}

    # eval on test
    print("Evaluating on test set...")
    test_predictions = []
    for i, batch in enumerate(test_batch):
        preds, _, loss = trainer.predict(batch)
Example #11
0
            print(format_str.format(datetime.now(), global_step, max_steps, epoch,\
                    opt['num_epoch'], loss, duration, current_lr))

    # eval on train
    print("Evaluating on train set...")
    train_predictions = []
    train_eval_loss = 0
    for i, batch in enumerate(train_batch):
        preds, _, loss = trainer.predict(batch)
        train_predictions += preds
        train_eval_loss += loss
    train_predictions = [id2label[p] for p in train_predictions]
    train_eval_loss = train_eval_loss / train_batch.num_examples * opt[
        'batch_size']

    train_p, train_r, train_f1 = scorer.score(train_batch.gold(),
                                              train_predictions)
    print(
        "epoch {}: train_loss = {:.6f}, train_eval_loss = {:.6f}, dev_f1 = {:.4f}"
        .format(epoch, train_loss, train_eval_loss, train_f1))
    train_score = train_f1
    # file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(epoch, train_loss, train_eval_loss, train_f1))

    # eval on dev
    print("Evaluating on dev set...")
    dev_predictions = []
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss = trainer.predict(batch)
        dev_predictions += preds
        dev_loss += loss
    dev_predictions = [id2label[p] for p in dev_predictions]
Example #12
0
    data_file = opt['data_dir'] + f'/test.json'
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

predictions = []
all_probs = []
for i, b in enumerate(batch):
    preds, probs, _ = model.predict(b)
    predictions += preds
    all_probs += probs
predictions = [id2label[p] for p in predictions]
metrics, other_data = scorer.score(batch.gold(), predictions, verbose=True)

p = metrics['precision']
r = metrics['recall']
f1 = metrics['f1']

wrong_indices = other_data['wrong_indices']
correct_indices = other_data['correct_indices']
wrong_predictions = other_data['wrong_predictions']

raw_data = np.array(batch.raw_data)
wrong_data = raw_data[wrong_indices]
correct_data = raw_data[correct_indices]

wrong_ids = [d['id'] for d in wrong_data]
correct_ids = [d['id'] for d in correct_data]
Example #13
0
                    opt['num_epoch'], loss, duration, current_lr))

    # eval on dev
    print("Evaluating on dev set...")
    predictions = []
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss = trainer.predict(batch)
        predictions += preds
        dev_loss += loss
    predictions = [id2label[p] for p in predictions]
    train_loss = train_loss / train_batch.num_examples * opt[
        'batch_size']  # avg loss per batch
    dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']

    dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions)
    print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\
        train_loss, dev_loss, dev_f1))
    dev_score = dev_f1
    file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format(
        epoch, train_loss, dev_loss, dev_score,
        max([dev_score] + dev_score_history)))

    # save
    model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch)
    trainer.save(model_file, epoch)
    if epoch == 1 or dev_score > max(dev_score_history):
        copyfile(model_file, model_save_dir + '/best_model.pt')
        print("new best model saved.")
        file_logger.log("new best model saved at epoch {}: {:.2f}\t{:.2f}\t{:.2f}"\
            .format(epoch, dev_p*100, dev_r*100, dev_score*100))
Example #14
0
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
model = RelationModel(opt)
model.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size']))
eval_batch = BatchLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
id2label = dict([(v,k) for k,v in constant.LABEL_TO_ID.items()])

predictions = []
all_probs = []
for batch in eval_batch.data:
    preds, probs, _ = model.predict(batch)
    predictions += preds
    all_probs += probs
predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(eval_batch.gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(args.dataset,p,r,f1))

print("Evaluation ended.")

Example #15
0
def transre_search(ffn, connect, hidden_dim, trans_layers, multi_heads,
                   ffn_ex_size, initial, final):
    opt['weighted'] = False
    opt['rnn'] = False
    opt['ffn'] = ffn
    opt['connect'] = connect
    opt['hidden_dim'] = hidden_dim
    opt['trans_layers'] = trans_layers
    opt['multi_heads'] = multi_heads
    opt['ffn_ex_size'] = ffn_ex_size
    opt['initial'] = initial
    opt['final'] = final

    id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
    model_name =str (opt['optim']) + '_' + str (opt['lr']) + str (ffn) + '_' +str(connect)+"_"\
                + str (hidden_dim) + '_' + str (trans_layers) + '_' + str (multi_heads) + '_' + \
                str (ffn_ex_size)+'_'+str(initial)+'_'+str(final)
    model_name = model_name + '' + str(opt['memo'])

    model_name = str(id) + "_" + model_name

    model_save_dir = opt['save_dir'] + '/' + model_name
    opt['model_save_dir'] = model_save_dir
    helper.ensure_dir(model_save_dir, verbose=True)

    # save config
    helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
    vocab.save(model_save_dir + '/vocab.pkl')
    file_logger = helper.FileLogger(
        model_save_dir + '/' + opt['log'],
        header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")
    helper.print_config(opt)

    if not opt['load']:
        trainer = TransTrainer(opt, emb_matrix=emb_matrix)
    else:
        # load pre-train model
        model_file = opt['model_file']
        print("Loading model from {}".format(model_file))
        model_opt = torch_utils.load_config(model_file)
        model_opt['optim'] = opt['optim']
        trainer = TransTrainer(model_opt)
        trainer.load(model_file)

    id2label = dict([(v, k) for k, v in label2id.items()
                     ])  # the classification result
    dev_score_history = []
    dev_loss_history = []
    current_lr = opt['lr']

    global_step = 0
    format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}'
    max_steps = len(train_batch) * opt['num_epoch']

    best_result = "unknown"
    file_logger.log(str(opt['memo']))
    for epoch in range(1, opt['num_epoch'] + 1):
        train_loss = 0
        epoch_start_time = time.time()
        for i, batch in enumerate(train_batch):
            start_time = time.time()
            global_step += 1
            loss, norm = trainer.update(batch)
            train_loss += loss
            if global_step % opt['log_step'] == 0:
                duration = time.time() - start_time
                print(
                    format_str.format(datetime.now(), global_step, max_steps,
                                      epoch, opt['num_epoch'], loss, duration,
                                      current_lr))

        print("Evaluating on dev set...")
        predictions = []
        dev_loss = 0
        for i, batch in enumerate(dev_batch):
            preds, _, loss, _ = trainer.predict(batch)
            predictions += preds
            dev_loss += loss
        predictions = [id2label[p] for p in predictions]
        train_loss = train_loss / train_batch.num_examples * opt[
            'batch_size']  # avg loss per batch
        dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']

        acc, dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions)
        print(
            "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}"
            .format(epoch, train_loss, dev_loss, dev_f1))
        dev_score = dev_f1
        file_logger.log("{}\t{:.3f}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format(
            epoch, acc, train_loss, dev_loss, dev_score,
            max([dev_score] + dev_score_history)))

        # save
        model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch)
        trainer.save(model_file, epoch)

        if epoch == 1 or dev_score > max(dev_score_history):
            copyfile(model_file, model_save_dir + '/best_model.pt')
            best_result = (model_name, dev_score)
            print("new best model saved.")
            file_logger.log(
                "new best model saved at epoch {}: {:.2f}\t{:.2f}\t{:.2f}".
                format(epoch, dev_p * 100, dev_r * 100, dev_score * 100))
        if epoch % opt['save_epoch'] != 0:
            os.remove(model_file)

        # lr schedule
        if len(dev_score_history
               ) > opt['decay_epoch'] and dev_score <= dev_score_history[
                   -1] and opt['optim'] in ['sgd', 'adagrad', 'adadelta']:
            current_lr *= opt['lr_decay']
            trainer.update_lr(current_lr)

        dev_score_history += [dev_score]
        dev_loss_history += [dev_loss]
        epoch_end_time = time.time()
        print("epoch time {:.3f}".format(epoch_end_time - epoch_start_time))
    return best_result
Example #16
0
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
label2id = constant.LABEL_TO_ID
id2label = dict([(v, k) for k, v in label2id.items()])

predictions = []
all_probs = []
batch_iter = tqdm(batch)
for i, b in enumerate(batch_iter):
    preds, probs, _ = trainer.predict(b)
    predictions += preds
    all_probs += probs

predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(batch.gold(), predictions)

fjson = open(data_file, 'r')
origin_data = json.load(fjson)
fjson.close()
with open("eval_output.txt", 'a') as f:
    f.write("True Label\tPrediction\tSubject\tObject\tSentence")
    for i in range(len(predictions)):
        if batch.gold()[i] != predictions[i]:
            ss = origin_data[i]['subj_start']
            se = origin_data[i]['subj_end']
            os = origin_data[i]['obj_start']
            oe = origin_data[i]['obj_end']

            token = origin_data[i]['token']
            subj = " ".join(token[ss:ss + 1])
Example #17
0
def train_unbiased_model(args, biased_batch_probs):
    # make opt
    opt = vars(args)
    opt["num_class"] = len(constant.LABEL_TO_ID)

    # load vocab
    vocab_file = opt['vocab_dir'] + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    opt['vocab_size'] = vocab.size
    emb_file = opt['vocab_dir'] + '/embedding.npy'
    emb_matrix = np.load(emb_file)
    assert emb_matrix.shape[0] == vocab.size
    assert emb_matrix.shape[1] == opt['emb_dim']

    # load data
    print("Loading data from {} with batch size {}...".format(
        opt["data_dir"], opt["batch_size"]))
    train_batch = DataLoader(
        opt["data_dir"] + "/" + args.data_name,
        opt["batch_size"],
        opt,
        vocab,
        evaluation=False,
    )
    dev_batch = DataLoader(opt["data_dir"] + "/dev.json",
                           opt["batch_size"],
                           opt,
                           vocab,
                           evaluation=True)

    model_id = opt["id"] if len(opt["id"]) > 1 else "0" + opt["id"]
    model_save_dir = opt["save_dir"] + "/" + model_id
    opt["model_save_dir"] = model_save_dir
    helper.ensure_dir(model_save_dir, verbose=True)

    # save config
    helper.save_config(opt, model_save_dir + "/config.json", verbose=True)
    vocab.save(model_save_dir + "/vocab.pkl")
    file_logger = helper.FileLogger(
        model_save_dir + "/" + opt["log"],
        header="# epoch\ttrain_loss\tdev_loss\tdev_f1")

    # print model info
    helper.print_config(opt)

    # model
    model = RelationModel(opt, emb_matrix=emb_matrix)

    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
    dev_f1_history = []
    current_lr = opt["lr"]

    global_step = 0
    global_start_time = time.time()
    format_str = (
        "{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}"
    )
    max_steps = len(train_batch) * opt["num_epoch"]

    # start training
    for epoch in range(1, opt["num_epoch"] + 1):
        train_loss = 0
        for i, batch in enumerate(train_batch):
            start_time = time.time()
            global_step += 1
            loss = model.update(batch,
                                torch.tensor(biased_batch_probs[i]).cuda())
            train_loss += loss
            if global_step % opt["log_step"] == 0:
                duration = time.time() - start_time
                print(
                    format_str.format(
                        datetime.now(),
                        global_step,
                        max_steps,
                        epoch,
                        opt["num_epoch"],
                        loss,
                        duration,
                        current_lr,
                    ))

        # eval on dev
        print("Evaluating on dev set...")
        predictions = []
        dev_loss = 0
        for i, batch in enumerate(dev_batch):
            preds, _, loss = model.predict(batch)
            predictions += preds
            dev_loss += loss
        predictions = [id2label[p] for p in predictions]
        dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions)

        f = open("label.txt", "w+")
        f.write(str(dev_batch.gold()))
        f.close()

        train_loss = (train_loss / train_batch.num_examples * opt["batch_size"]
                      )  # avg loss per batch
        dev_loss = dev_loss / dev_batch.num_examples * opt["batch_size"]
        print(
            "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}"
            .format(epoch, train_loss, dev_loss, dev_f1))
        file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(
            epoch, train_loss, dev_loss, dev_f1))

        # save
        model_file = model_save_dir + "/checkpoint_epoch_{}.pt".format(epoch)
        model.save(model_file, epoch)
        if epoch == 1 or dev_f1 > max(dev_f1_history):
            copyfile(model_file, model_save_dir + "/best_model.pt")
            print("new best model saved.")
        if epoch % opt["save_epoch"] != 0:
            os.remove(model_file)

        # lr schedule
        if (len(dev_f1_history) > 10 and dev_f1 <= dev_f1_history[-1]
                and opt["optim"] in ["sgd", "adagrad"]):
            current_lr *= opt["lr_decay"]
            model.update_lr(current_lr)

        dev_f1_history += [dev_f1]
        print("")

    print("Training ended with {} epochs.".format(epoch))
Example #18
0
        inputs['words'], inputs['length'] = batch.token
        inputs['pos'] = batch.pos
        inputs['ner'] = batch.ner
        inputs['subj_pst'] = batch.subj_pst
        inputs['obj_pst'] = batch.obj_pst
        inputs['masks'] = torch.eq(batch.token[0], opt['vocab_pad_id'])

        target = batch.relation

        preds, _, loss = model.predict(inputs, target)
        predictions += preds
        dev_loss += loss
        golds += target.data.tolist()
    predictions = [RELATION.vocab.itos[p] for p in predictions]
    golds = [RELATION.vocab.itos[p] for p in golds]
    dev_p, dev_r, dev_f1 = scorer.score(golds, predictions)
    
    # print training information
    train_loss = train_loss / len(iterator_train) * opt['batch_size'] # avg loss per batch
    dev_loss = dev_loss / len(iterator_dev) * opt['batch_size']
    print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\
            train_loss, dev_loss, dev_f1))
    file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(epoch, train_loss, dev_loss, dev_f1))

    # save the current model
    model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch)
    model.save(model_file, epoch)
    if epoch == 1 or dev_f1 > max(dev_f1_history):
        copyfile(model_file, model_save_dir + '/best_model.pt')
        print("new best model saved.")
    if epoch % opt['save_epoch'] != 0:
helper.print_config(opt)
label2id = constant.LABEL_TO_ID
id2label = dict([(v, k) for k, v in label2id.items()])

predictions = []
all_probs = []
all_ids = []
for i, b in enumerate(loaded):
    preds, probs, _, ids = trainer.predict_with_confidence(b)
    predictions += preds
    all_probs += probs
    all_ids += ids

predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(loaded.gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))

if args.trace_file != None:
    print(f'Creating trace file "{args.trace_file}"')

    with open(args.trace_file, 'w', encoding='utf-8',
              newline='') as trace_file:
        csv_writer = csv.writer(trace_file)

        csv_writer.writerow(['id', 'gold', 'predicted', 'probability'])

        for id, gold, prediction, probability in zip(all_ids, loaded.gold(),
                                                     predictions, all_probs):
            csv_writer.writerow([id, gold, prediction, probability])
Example #20
0
sent_predictions = []
batch_iter = tqdm(batch)
for i, b in enumerate(batch_iter):
    preds, probs, _, sent_preds = trainer.predict(b)
    predictions += preds
    all_probs += probs
    sent_predictions += sent_preds

lens = [len(p) for p in predictions]

predictions = [[id2label[l + 1]] for p in predictions for l in p]
sent_predictions = [sent_id2label[p] for p in sent_predictions]
#print(len(predictions))
#print(len(batch.gold()))
p, r, f1 = scorer.score(batch.gold(),
                        predictions,
                        verbose=True,
                        verbose_output=args.per_class == 1)

print('scroes from sklearn: ')
macro_f1 = f1_score(batch.gold(), predictions, average='macro')
micro_f1 = f1_score(batch.gold(), predictions, average='micro')
macro_p = precision_score(batch.gold(), predictions, average='macro')
micro_p = precision_score(batch.gold(), predictions, average='micro')
macro_r = recall_score(batch.gold(), predictions, average='macro')
micro_r = recall_score(batch.gold(), predictions, average='micro')
print('micro scores: ')
print('micro P: ', micro_p)
print('micro R: ', micro_r)
print('micro F1: ', micro_f1)
print("")
print("macro scroes: ")
Example #21
0
        model_opt = torch_utils.load_config(model_file)
        model_opt['optim'] = opt['optim']
        model_opt['lr'] = opt['lr']
        model_opt['lr_decay'] = opt['lr_decay']
        trainer = GCNTrainer(model_opt)
        trainer.load(model_file)
        # model_file = "saved_models/02/" + subj + "_" + obj + "_" + "best_model.pt"
        print("Evaluating on dev set...")
        predictions = []
        dev_loss = 0
        for i, batch in enumerate(dev_batch):
            preds, probs, loss, samples = trainer.predict(batch)
            predictions += preds
            dev_loss += loss
        predictions = [id2label[p] for p in predictions]
        dev_p, dev_r, dev_f1 = scorer.score(dev_batch, predictions)

        test_loss = 0
        predictions = []
        for i, batch in enumerate(test_batch):
            preds, _, loss, samples = trainer.predict(batch)
            predictions += preds
            test_loss += loss
        predictions = [id2label[p] for p in predictions]
        test_loss = test_loss / test_batch.num_examples * opt['batch_size']
        test_p, test_r, test_f1 = scorer.score(test_batch, predictions)
        score_history += [dev_f1]
        test_score_history += [test_f1]
    stand = 3
    for epoch in range(1, opt['num_epoch'] + 1):
        # if (not train_batch.NoAugData()):
Example #22
0
def main():
    # set top-level random seeds
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    if args.cpu:
        args.cuda = False
    elif args.cuda:
        # force random seed for reproducibility
        # also apply same seed to numpy in every file
        torch.backends.cudnn.deterministic = True
        torch.cuda.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    # make opt
    opt = vars(args)
    opt['num_class'] = len(constant.LABEL_TO_ID)

    # load vocab
    vocab_file = opt['vocab_dir'] + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)

    # in some previous experiments we saw that lower vocab size can improve performance
    # but it was in a completely different project although on the same data
    # here it seems it's much harder to get this to work
    # uncomment the following line if this is solved:
    # new_vocab_size = 30000

    opt['vocab_size'] = vocab.size
    emb_file = opt['vocab_dir'] + '/embedding.npy'
    emb_matrix = np.load(emb_file)
    assert emb_matrix.shape[0] == vocab.size
    assert emb_matrix.shape[1] == opt['emb_dim']

    # load data
    print("Loading data from {} with batch size {}...".format(
        opt['data_dir'], opt['batch_size']))
    train_batch = DataLoader(opt['data_dir'] + '/train.json',
                             opt['batch_size'],
                             opt,
                             vocab,
                             evaluation=False)
    dev_batch = DataLoader(opt['data_dir'] + '/dev.json',
                           opt['batch_size'],
                           opt,
                           vocab,
                           evaluation=True)

    model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
    model_save_dir = opt['save_dir'] + '/' + model_id
    opt['model_save_dir'] = model_save_dir
    helper.ensure_dir(model_save_dir, verbose=True)

    # save config
    helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
    vocab.save(model_save_dir + '/vocab.pkl')
    file_logger = helper.FileLogger(
        model_save_dir + '/' + opt['log'],
        header="# epoch\ttrain_loss\tdev_loss\tdev_p\tdev_r\tdev_f1")

    # print model info
    helper.print_config(opt)

    # model
    model = RelationModel(opt, emb_matrix=emb_matrix)

    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
    dev_f1_history = []
    current_lr = opt['lr']

    global_step = 0

    format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}'
    max_steps = len(train_batch) * opt['num_epoch']

    # setup the scheduler for lr decay
    # this doesn't seem to work well compared to what we already have
    # scheduler = ReduceLROnPlateau(model.optimizer, mode='min', factor=opt['lr_decay'], patience=1)

    # start training
    for epoch in range(1, opt['num_epoch'] + 1):
        # TODO: if lr warmup is used, the lr console output is not updated
        print(
            "Current params: " + " heads-" + str(opt["n_head"]) +
            " enc_layers-" + str(opt["num_layers_encoder"]),
            " drop-" + str(opt["dropout"]) + " scaled_drop-" +
            str(opt["scaled_dropout"]) + " lr-" + str(opt["lr"]),
            " lr_decay-" + str(opt["lr_decay"]) + " max_grad_norm-" +
            str(opt["max_grad_norm"]))
        print(
            " weight_no_rel-" + str(opt["weight_no_rel"]) + " weight_rest-" +
            str(opt["weight_rest"]) + " attn-" + str(opt["attn"]) +
            " attn_dim-" + str(opt["attn_dim"]),
            " obj_sub_pos-" + str(opt["obj_sub_pos"]) + " new_residual-" +
            str(opt["new_residual"]))
        print(
            " use_batch_norm-" + str(opt["use_batch_norm"]) +
            " relative_positions-" + str(opt["relative_positions"]),
            " decay_epoch-" + str(opt["decay_epoch"]) + " use_lemmas-" +
            str(opt["use_lemmas"]), " hidden_self-" + str(opt["hidden_self"]))

        train_loss = 0
        for i, batch in enumerate(train_batch):

            start_time = time.time()
            global_step += 1

            loss = model.update(batch)
            train_loss += float(loss)

            if global_step % opt['log_step'] == 0:
                duration = time.time() - start_time
                print(
                    format_str.format(datetime.now(), global_step, max_steps,
                                      epoch, opt['num_epoch'], loss, duration,
                                      current_lr))
            # do garbage collection,
            # as per https://discuss.pytorch.org/t/best-practices-for-maximum-gpu-utilization/13863/6
            del loss

        # eval on dev
        print("Evaluating on dev set...")
        predictions = []
        dev_loss = 0
        for i, batch in enumerate(dev_batch):
            preds, _, loss = model.predict(batch)
            predictions += preds
            dev_loss += float(loss)
            del loss

        predictions = [id2label[p] for p in predictions]
        dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions)

        train_loss = train_loss / train_batch.num_examples * opt[
            'batch_size']  # avg loss per batch
        dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']
        print(
            "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch, \
                                                                                       train_loss, dev_loss, dev_f1)
        )
        file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}\t{:.4f}".format(
            epoch, train_loss, dev_loss, dev_p, dev_r, dev_f1))

        # save
        model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch)
        model.save(model_file, epoch)
        if epoch == 1 or dev_f1 > max(dev_f1_history):
            copyfile(model_file, model_save_dir + '/best_model.pt')
            print("new best model saved.")
        if epoch % opt['save_epoch'] != 0:
            os.remove(model_file)

        # reduce learning rate if it stagnates by a certain decay rate and within given epoch patience
        # this for some reason works worth than the implementation we have afterwards
        # scheduler.step(dev_loss)

        if opt["optim"] != "noopt_adam" and opt["optim"] != "noopt_nadam":

            # do warm_up_for sgd only instead of adam
            do_warmup_trick = False

            if do_warmup_trick:
                # print("do_warmup_trick")

                # 1 and 5 first worked kind of
                # 10 and 15
                current_lr = 10 * (360**(-0.5) *
                                   min(epoch**(-0.5), epoch * 15**(-1.5)))
                # print("current_lr", current_lr)
                model.update_lr(current_lr)

            else:
                # decay schedule # 15 is best!
                # simulate patience of x epochs
                if len(dev_f1_history
                       ) > opt['decay_epoch'] and dev_f1 <= dev_f1_history[-1]:
                    current_lr *= opt['lr_decay']
                    model.update_lr(current_lr)

        # else, update the learning rate in torch_utils.py

        dev_f1_history += [dev_f1]
        print("")

    print("Training ended with {} epochs.".format(epoch))
Example #23
0
final_predictions, inst_predictions, aux_predictions = [], [], []
all_final_probs, all_inst_probs, all_aux_probs = [], [], []
for i, b in enumerate(batch):
    final_preds, inst_preds, aux_preds, final_probs, inst_probs, aux_probs = student_model.predict_all(
        b)
    final_predictions += final_preds
    inst_predictions += inst_preds
    aux_predictions += aux_preds
    all_final_probs += final_probs
    all_inst_probs += inst_probs
    all_aux_probs += aux_probs
final_predictions = [id2label[p] for p in final_predictions]
inst_predictions = [id2label[p] for p in inst_predictions]
aux_predictions = [id2label[p] for p in aux_predictions]
print('\n >> Final Prediction:')
_, _, _ = scorer.score(batch.gold(), final_predictions, verbose=True)
print('\n >> Instance Prediction:')
_, _, _ = scorer.score(batch.gold(), inst_predictions, verbose=True)
print('\n >> Auxiliary Prediction:')
_, _, _ = scorer.score(batch.gold(), aux_predictions, verbose=True)

# save probability scores
# if len(args.out) > 0:
#     outfile = 'saved_models/' + args.model_id + '/' + args.out
#     with open(outfile, 'w') as fw:
#         for f_prob, i_prob, a_prob in zip(all_final_probs, all_inst_probs, all_aux_probs):
#             fw.write(json.dumps([round(p, 4) for p in f_prob]))
#             fw.write('\r\n')
#             fw.write(json.dumps([round(p, 4) for p in i_prob]))
#             fw.write('\r\n')
#             fw.write(json.dumps([round(p, 4) for p in a_prob]))
id2label = dict([(v, k) for k, v in label2id.items()])
predictions = []
all_probs = []
all_ids = []

batch_tuples = zip(*model_data)

batch_tuple_iter = tqdm(batch_tuples)
for i, data in enumerate(batch_tuple_iter):
    preds, probs, ids = evaluator.predict(data, cuda)
    predictions += preds
    all_probs += probs
    all_ids += ids

predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(model_data[0].gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))

if args.trace_file_for_misses != None:
    print(
        f'Preparing miss information and writing it to "{args.trace_file_for_misses}"'
    )

    with open(args.trace_file_for_misses, 'w', encoding='utf-8',
              newline='') as trace_file_for_misses:
        csv_writer = csv.writer(trace_file_for_misses)
        csv_writer.writerow(['id', 'gold', 'predicted'])

        for gold, prediction, id in zip(model_data[0].gold(), predictions,
                                        all_ids):
Example #25
0
        train_loss += loss
        if global_step % opt['log_step'] == 0:
            duration = time.time() - start_time
            print(format_str.format(datetime.now(), global_step, max_steps, epoch,\
                    opt['num_epoch'], loss, duration, current_lr))

    # eval on dev
    print("Evaluating on dev set...")
    predictions = []
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss = model.predict(batch)
        predictions += preds
        dev_loss += loss
    predictions = [id2label[p] for p in predictions]
    current_dev_metrics, _ = scorer.score(dev_batch.gold(), predictions)
    dev_f1 = current_dev_metrics['f1']

    train_loss = train_loss / train_batch.num_examples * opt[
        'batch_size']  # avg loss per batch
    dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']
    print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".
          format(epoch, train_loss, dev_loss, dev_f1))
    file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(
        epoch, train_loss, dev_loss, dev_f1))

    print("Evaluating on test set...")
    predictions = []
    test_loss = 0
    test_preds = []
    for i, batch in enumerate(test_batch):
Example #26
0
predictions = []
all_probs = []
golds = []
for i, batch in enumerate(iterator_test):
    inputs = {}
    inputs['words'], inputs['length'] = batch.token
    inputs['pos'] = batch.pos
    inputs['ner'] = batch.ner
    inputs['subj_pst'] = batch.subj_pst
    inputs['obj_pst'] = batch.obj_pst
    inputs['masks'] = torch.eq(batch.token[0], opt['vocab_pad_id'])

    target = batch.relation

    preds, probs, _ = model.predict(inputs, target)
    predictions += preds
    all_probs += probs
    golds += target.data.tolist()
predictions = [RELATION.vocab.itos[p] for p in predictions]
golds = [RELATION.vocab.itos[p] for p in golds]
p, r, f1 = scorer.score(golds, predictions, verbose=True)

# save probability scores
if len(args.out) > 0:
    helper.ensure_dir(os.path.dirname(args.out))
    with open(args.out, 'wb') as outfile:
        pickle.dump(all_probs, outfile)
    print("Prediction scores saved to {}.".format(args.out))

print("Evaluation ended.")
Example #27
0
    trainer.load(model_file)
    batch = DataLoader([data_file],
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True,
                       corefresolve=True)
    batch_iter = tqdm(batch)

    all_probs = []
    samples = []
    for i, b in enumerate(batch_iter):
        preds, probs, _, sample = trainer.predict(b)
        predictions += preds
        all_probs += probs
        # effsum+=lab_eff
        # lab_nums+=lab_num
        samples = samples + sample

    key += batch.gold()

    # with open('samples.json','w') as f:
    #     json.dump(samples,f,indent=4)

predictions = [id2label[p] for p in predictions]
p, r, f1 = scorer.score(batch, predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))

print("Evaluation ended.")
Example #28
0
    dev_loss = 0
    for i, batch in enumerate(dev_batch):
        preds, _, loss, _ = trainer.predict(batch)
        predictions += preds
        dev_loss += loss
    predictions = [[id2label[l + 1]] for p in predictions for l in p]
    train_loss = train_loss / train_batch.num_examples * opt[
        'batch_size']  # avg loss per batch
    train_sent_loss = train_sent_loss / train_batch.num_examples * opt[
        'batch_size']  # avg loss per batch
    train_dep_path_loss = train_dep_path_loss / train_batch.num_examples * opt[
        'batch_size']  # avg loss per batch
    dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size']

    dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(),
                                        predictions,
                                        method='macro')
    print("epoch {}: train_loss = {:.6f}, train_sent_loss = {:.6f}, train_dep_path_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\
        train_loss, train_sent_loss, train_dep_path_loss, dev_loss, dev_f1))
    dev_score = dev_f1
    file_logger.log(
        "{}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format(
            epoch, train_loss, train_sent_loss, train_dep_path_loss, dev_loss,
            dev_score, max([dev_score] + dev_score_history)))

    # save
    model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch)
    trainer.save(model_file, epoch)
    if epoch == 1 or dev_score > max(dev_score_history):
        copyfile(model_file, model_save_dir + '/best_model.pt')
        print("new best model saved.")
predictions = []
all_ids = []
for i, batch_tuple in enumerate(zip(*[model_stuff.data for model_stuff in models_stuff])):
    preds, ids = evaluator.predict(batch_tuple, cuda)
    all_ids += ids

    predictions += preds

predictions = [id2label[p] for p in predictions]





p, r, f1 = scorer.score(ud.data.gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(args.dataset,p,r,f1))


if args.trace_file_for_misses != None:
    print(f'Preparing miss information and writing it to "{args.trace_file_for_misses}"')

    with open(args.trace_file_for_misses, 'w', encoding='utf-8', newline='') as trace_file_for_misses:
        csv_writer = csv.writer(trace_file_for_misses)
        csv_writer.writerow( ['id', 'gold', 'predicted'])

        for gold, prediction, id in zip(ud.data.gold(), predictions, all_ids):
            if gold != prediction:
                csv_writer.writerow( [id, gold, prediction])

Example #30
0
    #         for k in range(attn_list[layer][bat,:,:,:].size(0)):
    #             head =k
    #             attn_mat = attn_list[layer][bat,:,:,:][head,:,:]
    #             token_id = b[0][bat,:].data.cpu().numpy()
    #             token = viz_token(token_id,id2word)
    #             name = "layer"+str(layer)+"_"+"head"+str(head)
    #             #print(token)
    #             label =b[13][bat]
    #             viz_att(token,attn_mat.data.cpu().numpy(),name,label.data.cpu().numpy())
    #             print(name+".svg saved")

    predictions += preds
    all_probs += probs

predictions = [id2label[p] for p in predictions]
acc, p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True)
print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(
    args.dataset, p, r, f1))

print("Evaluation ended.")

# args.out = "./new_trans.pkl"
# with open(args.out, 'wb') as outfile:
#     pickle.dump(all_probs, outfile)
# print("Prediction scores saved to {}.".format(args.out))

# predictions = []
# all_probs = []
# for i, b in enumerate (batch_iter):
#     preds, probs, _ = trainer2.predict (b)
#