Beispiel #1
0
def evaluate(model, dataloader, q_dict, c_dict):
    score = 0
    vq_score = 0
    eq_score = 0
    print_freq = 50
    results = []
    for i, (qid, iid, v, q, a, c, l) in enumerate(dataloader):
        batch_size = v.size(0)
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()

        ans_pred, exp_pred = model.evaluate(v, q)
        vq_batch_score = compute_score_with_logits(ans_pred[0], a.cuda()).sum()
        eq_batch_score = compute_score_with_logits(ans_pred[1], a.cuda()).sum()
        batch_score = compute_score_with_logits(ans_pred[0] + ans_pred[1],
                                                a.cuda()).sum()
        vq_score += vq_batch_score
        eq_score += eq_batch_score
        score += batch_score
        #batch_score = compute_score_with_logits(ans_pred, a.cuda()).sum()

        for j in range(batch_size):
            if c[j][0] != 0:
                results.append({
                    'question_id':
                    qid[j],
                    'image_id':
                    iid[j],
                    'question':
                    ' '.join([
                        q_dict.idx2word[p] for p in q[j].data.tolist() if p > 0
                    ]),
                    'answer':
                    dataloader.dataset.label2ans[ans_pred[0][j].max(0)
                                                 [1].data[0]],
                    'explain_res':
                    ' '.join([
                        c_dict.idx2word[p] for p in exp_pred[j].data.tolist()
                        if p > 2
                    ]),
                    'explain_gt':
                    ' '.join(
                        [c_dict.idx2word[p] for p in c[j].tolist() if p > 2])
                })

        if False:  #i % print_freq == 0:
            print('Batch: [%d/%d]\nQuestion: %s\t Answer Prediction: %s\n'
                  'Explain GT: %s\nExplain Prediction: %s' %
                  (i, len(dataloader), results[i * j]['question'],
                   results[i * j]['answer'], results[i * j]['explain_gt'],
                   results[i * j]['explain_res']))

    vq_score = vq_score / len(dataloader.dataset)
    eq_score = eq_score / len(dataloader.dataset)
    score = score / len(dataloader.dataset)

    scores = {'vq_score': vq_score, 'eq_score': eq_score, 'ens_score': score}
    return scores, results
def evaluate(model, dataloader, q_dict, c_dict, gt=True):
    score = 0
    vq_score = 0
    eq_score = 0
    print_freq = 50
    results = []
    for i, (qid, iid, v, q, a, c, l) in enumerate(dataloader):
        batch_size = v.size(0)
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()

        if not gt:
            c = None
        ans_pred, exp_pred = model.evaluate(v, q, c)
        if type(ans_pred) is list:
            vq_batch_score = compute_score_with_logits(ans_pred[0],
                                                       a.cuda()).sum()
            eq_batch_score = compute_score_with_logits(ans_pred[1],
                                                       a.cuda()).sum()
            vq_score += vq_batch_score
            eq_score += eq_batch_score
            ans_pred = ans_pred[0] + ans_pred[1]

        batch_score = compute_score_with_logits(ans_pred, a.cuda()).sum()
        score += batch_score
        #batch_score = compute_score_with_logits(ans_pred, a.cuda()).sum()

        # for j in range(batch_size):
        #     if c[j][0] != 0:
        #         results.append({
        #             'question_id': qid[j],
        #             'image_id': iid[j],
        #             'question': ' '.join([q_dict.idx2word[p] for p in q[j].data.tolist() if p > 0]),
        #             'answer': dataloader.dataset.label2ans[ans_pred[0][j].max(0)[1].data[0]],
        #             'explain_res': ' '.join([c_dict.idx2word[p] for p in exp_pred[j].data.tolist() if p > 2]),
        #             'explain_gt': ' '.join([c_dict.idx2word[p] for p in c[j].tolist() if p > 2])
        #         })

    score = score / len(dataloader.dataset)

    if vq_score == 0:
        scores = score
    else:
        vq_score = vq_score / len(dataloader.dataset)
        eq_score = eq_score / len(dataloader.dataset)
        scores = {
            'vq_score': vq_score,
            'eq_score': eq_score,
            'ens_score': score
        }
    print('Scores :', scores)
    return scores, results
Beispiel #3
0
 def update_per_batch(self,
                      model,
                      answers,
                      loss,
                      pred,
                      curr_size,
                      logits_key='logits'):
     upper_bound = answers.max(1)[0].sum()
     self.upper_bound += upper_bound
     # self.total_norm += nn.utils.clip_grad_norm_(model.parameters(), 0.25)
     # self.count_norm += 1
     from train import compute_score_with_logits
     batch_score = compute_score_with_logits(pred, answers.data,
                                             logits_key).sum()
     self.loss += loss.data * curr_size
     self.raw_score += batch_score
     self.num_examples += curr_size
Beispiel #4
0
def evaluate(model, dataloader, q_dict, c_dict):
    score = 0
    vq_score = 0
    eq_score = 0
    print_freq = 50
    results = []
    for i, (qid, iid, v, q, a, c, l) in enumerate(dataloader):
        batch_size = v.size(0)
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()
        c = Variable(c, volatile=True).cuda()

        ans_pred = model.evaluate(v, q, c)
        #ans_pred = model(v, q)
        c = c.data
        batch_score = compute_score_with_logits(ans_pred, a.cuda()).sum()
        score += batch_score

        for j in range(batch_size):
            results.append({
                'question_id':
                qid[j],
                'image_id':
                iid[j],
                'question':
                ' '.join(
                    [q_dict.idx2word[p] for p in q[j].data.tolist() if p > 0]),
                'answer_pred':
                dataloader.dataset.label2ans[ans_pred[j].max(0)[1].data[0]],
                'answer_gt':
                dataloader.dataset.label2ans[int(a[j].max(0)[1])],
                'explain_gt':
                ' '.join([c_dict.idx2word[p] for p in c[j].tolist() if p > 2])
            })

    results = sorted(results, key=lambda x: x['question_id'])
    score = score / len(dataloader.dataset)

    return score, results
Beispiel #5
0
def evaluate(model, dataloader, q_dict, c_dict):
    total_others_score = 0
    total_yesno_score = 0
    total_number_score = 0
    total_all_score = 0
    vq_score = 0
    eq_score = 0
    print_freq = 50
    results = []

    ann_path = os.path.join('data','v2_mscoco_val2014_annotations.json')
    anns = json.load(open(ann_path))['annotations']
    qid2atype = {a['question_id'] : a['answer_type'] for a in anns}
    #qid2qtype = {a['question_id'] : a['question_type'] for a in anns}
    atype_map = {'other':0, 'yes/no':1, 'number':2}

    others_cnt = 0
    yesno_cnt = 0
    number_cnt = 0
    for i, (qid, iid, v, q, a, c, l) in enumerate(dataloader):
        batch_size = v.size(0)
        v = Variable(v, volatile=True).cuda()
        q = Variable(q, volatile=True).cuda()

        ans_pred, exp_pred = model.evaluate(v, q)

        atype_list = torch.Tensor([atype_map[qid2atype[id]] for id in qid])
        others_idx = (atype_list==0).nonzero().squeeze()
        yesno_idx = (atype_list==1).nonzero().squeeze()
        number_idx = (atype_list==2).nonzero().squeeze()

        others_cnt += len(others_idx)
        yesno_cnt += len(yesno_idx)
        number_cnt += len(number_idx)

        others_score = compute_score_with_logits(ans_pred[others_idx.cuda()], a[others_idx].cuda()).sum()
        yesno_score = compute_score_with_logits(ans_pred[yesno_idx.cuda()], a[yesno_idx].cuda()).sum()
        number_score = compute_score_with_logits(ans_pred[number_idx.cuda()], a[number_idx].cuda()).sum()

        total_others_score += others_score
        total_yesno_score += yesno_score
        total_number_score += number_score

        all_score = compute_score_with_logits(ans_pred, a.cuda()).sum()
        total_all_score += all_score

        for j in range(batch_size):
            results.append({
                'question_id': qid[j],
                'image_id': iid[j],
                'question': ' '.join([q_dict.idx2word[p] for p in q[j].data.tolist() if p > 0]),
                'answer_pred': dataloader.dataset.label2ans[ans_pred[j].max(0)[1].data[0]],
                'answer_gt': dataloader.dataset.label2ans[int(a[j].max(0)[1])],
                'explain_res': ' '.join([c_dict.idx2word[p] for p in exp_pred[j].data.tolist() if p > 2]),
                'explain_gt': ' '.join([c_dict.idx2word[p] for p in c[j].tolist() if p > 2])
            })

    results = sorted(results, key=lambda x: x['question_id'])
    total_all_score = total_all_score / len(dataloader.dataset)
    total_others_score /= others_cnt
    total_yesno_score /= yesno_cnt
    total_number_score /= number_cnt

    scores = {'all':total_all_score, 'others':total_others_score, 'yes/no':total_yesno_score, 'number':total_number_score}
    return scores, results
Beispiel #6
0
def evaluate(model, dataloader, model_hps, args, device):
    model.eval()
    label2ans = dataloader.dataset.label2ans
    num_answers = len(label2ans)
    relation_type = dataloader.dataset.relation_type
    N = len(dataloader.dataset)
    results = []
    score = 0
    pbar = tqdm(total=len(dataloader))

    if args.save_logits:
        idx = 0
        pred_logits = np.zeros((N, num_answers))
        gt_logits = np.zeros((N, num_answers))

    for i, (v, norm_bb, q, target, qid, _, bb,
            spa_adj_matrix, sem_adj_matrix) in enumerate(dataloader):
        batch_size = v.size(0)
        num_objects = v.size(1)
        v = Variable(v).to(device)
        norm_bb = Variable(norm_bb).to(device)
        q = Variable(q).to(device)
        pos_emb, sem_adj_matrix, spa_adj_matrix = prepare_graph_variables(
            relation_type, bb, sem_adj_matrix, spa_adj_matrix, num_objects,
            model_hps.nongt_dim, model_hps.imp_pos_emb_dim,
            model_hps.spa_label_num, model_hps.sem_label_num, device)
        pred, att = model(v, norm_bb, q, pos_emb, sem_adj_matrix,
                          spa_adj_matrix, None)
        # Check if target is a placeholder or actual targets
        if target.size(-1) == num_answers:
            target = Variable(target).to(device)
            batch_score = compute_score_with_logits(
                pred, target, device).sum()
            score += batch_score
            if args.save_logits:
                gt_logits[idx:batch_size+idx, :] = target.cpu().numpy()

        if args.save_logits:
            pred_logits[idx:batch_size+idx, :] = pred.cpu().numpy()
            idx += batch_size

        if args.save_answers:
            qid = qid.cpu()
            pred = pred.cpu()
            current_results = make_json(pred, qid, dataloader)
            results.extend(current_results)

        pbar.update(1)

    score = score / N
    results_folder = f"{args.output_folder}/results"
    if args.save_logits:
        utils.create_dir(results_folder)
        save_to = f"{results_folder}/logits_{args.dataset}" +\
            f"_{args.split}.npy"
        np.save(save_to, pred_logits)

        utils.create_dir("./gt_logits")
        save_to = f"./gt_logits/{args.dataset}_{args.split}_gt.npy"
        if not os.path.exists(save_to):
            np.save(save_to, gt_logits)
    if args.save_answers:
        utils.create_dir(results_folder)
        save_to = f"{results_folder}/{args.dataset}_" +\
            f"{args.split}.json"
        json.dump(results, open(save_to, "w"))
    return score
Beispiel #7
0
type(tmp)
tmp.keys()
len(tmp)

for i, (v, b, q, a) in enumerate(test_loader):
    v = v.cuda()
    b = b.cuda()
    q = q.long().cuda()
    a = a.cuda()
    model = model(v, b, q, a)
    loss = train.instance_bce_with_logits(pred, a)
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), 0.25)
    optim.step()
    optim.zero_grad()
    batch_score = train.compute_score_with_logits(pred, a.data).sum()
    total_loss += loss.data * v.size(0)
    train_score += batch_score
    if i % 5000 == 0:
        print('{}_iteration_done'.format(i))
total_loss /= len(train_loader.dataset)
train_score = 100 * train_score / len(train_loader.dataset)
model.train(False)
eval_score, bound = train.evaluate(model, eval_loader)

import argparse
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
def train(model,
          train_loader,
          eval_loader,
          num_epochs,
          output,
          opt=None,
          s_epoch=0):
    lr_default = 1e-3 if eval_loader is not None else 7e-4
    lr_decay_step = 2
    lr_decay_rate = .25
    lr_decay_epochs = range(
        10, 20, lr_decay_step) if eval_loader is not None else range(
            10, 20, lr_decay_step)
    gradual_warmup_steps = [
        0.5 * lr_default, 1.0 * lr_default, 1.5 * lr_default, 2.0 * lr_default
    ]
    saving_epoch = 3
    grad_clip = .25

    utils.create_dir(output)
    optim = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()), lr=lr_default) \
        if opt is None else opt
    logger = utils.Logger(os.path.join(output, 'log.txt'))
    best_eval_score = 0

    utils.print_model(model, logger)
    logger.write('optim: adamax lr=%.4f, decay_step=%d, decay_rate=%.2f, grad_clip=%.2f' % \
        (lr_default, lr_decay_step, lr_decay_rate, grad_clip))

    for epoch in range(s_epoch, num_epochs):
        total_loss = 0
        train_score = 0
        total_norm = 0
        count_norm = 0
        t = time.time()
        N = 0

        if epoch < len(gradual_warmup_steps):
            optim.param_groups[0]['lr'] = gradual_warmup_steps[epoch]
            logger.write('gradual warmup lr: %.4f' %
                         optim.param_groups[0]['lr'])
        elif epoch in lr_decay_epochs:
            optim.param_groups[0]['lr'] *= lr_decay_rate
            logger.write('decreased lr: %.4f' % optim.param_groups[0]['lr'])
        else:
            logger.write('lr: %.4f' % optim.param_groups[0]['lr'])

        for i, (v, b, p, e, n, a, idx, types) in enumerate(train_loader):
            v = v.cuda()
            b = b.cuda()
            p = p.cuda()
            e = e.cuda()
            a = a.cuda()

            _, logits = model(v, b, p, e, a)
            n_obj = logits.size(2)
            logits.squeeze_()

            merged_logit = torch.cat(
                tuple(logits[j, :, :n[j][0]] for j in range(n.size(0))),
                -1).permute(1, 0)
            merged_a = torch.cat(
                tuple(a[j, :n[j][0], :n_obj] for j in range(n.size(0))), 0)

            loss = instance_bce_with_logits(merged_logit, merged_a,
                                            'sum') / v.size(0)
            N += n.sum().float()

            batch_score = compute_score_with_logits(merged_logit,
                                                    merged_a.data).sum()

            loss.backward()
            total_norm += nn.utils.clip_grad_norm_(model.parameters(),
                                                   grad_clip)
            count_norm += 1
            optim.step()
            optim.zero_grad()
            total_loss += loss.item() * v.size(0)
            train_score += batch_score.item()

        total_loss /= N
        train_score = 100 * train_score / N
        if None != eval_loader:
            model.train(False)
            eval_score, bound, entropy = evaluate(model, eval_loader)
            model.train(True)

        logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t))
        logger.write('\ttrain_loss: %.2f, norm: %.4f, score: %.2f' %
                     (total_loss, total_norm / count_norm, train_score))
        if eval_loader is not None:
            logger.write('\teval score: %.2f/%.2f/%.2f (%.2f)' %
                         (100 * eval_score[0], 100 * eval_score[1],
                          100 * eval_score[2], 100 * bound))
            eval_score = eval_score[0]

        if eval_loader is not None and entropy is not None:
            info = ''
            for i in range(entropy.size(0)):
                info = info + ' %.2f' % entropy[i]
            logger.write('\tentropy: ' + info)

        if (eval_loader is not None and eval_score > best_eval_score) or (
                eval_loader is None and epoch >= saving_epoch):
            model_path = os.path.join(output, 'model_epoch%d.pth' % epoch)
            utils.save_model(model_path, model, epoch, optim)
            if eval_loader is not None:
                best_eval_score = eval_score