def train():
    criterion = nn.KLDivLoss(size_average=False)
    train_loss = np.zeros(opt.MAX_ITERATIONS + 1)
    results = []
    for iter_idx, (data, word_length, feature, answer, epoch) in enumerate(train_Loader):
    	model.train()
        data = np.squeeze(data, axis=0)
        word_length = np.squeeze(word_length, axis=0)
        feature = np.squeeze(feature, axis=0)
        answer = np.squeeze(answer, axis=0)
        epoch = epoch.numpy()

        data = Variable(data).cuda()
        word_length = word_length.cuda()
        img_feature = Variable(feature).cuda()
        label = Variable(answer).cuda().float()
        optimizer.zero_grad()
        pred = model(data, word_length, img_feature, 'train')
        loss = criterion(pred, label)
        loss.backward()
        optimizer.step()
        train_loss[iter_idx] = loss.data[0]
        if iter_idx % opt.DECAY_STEPS == 0 and iter_idx != 0:
            adjust_learning_rate(optimizer, opt.DECAY_RATE)
        if iter_idx % opt.PRINT_INTERVAL == 0 and iter_idx != 0:
            now = str(datetime.datetime.now())
            c_mean_loss = train_loss[iter_idx-opt.PRINT_INTERVAL:iter_idx].mean()/opt.BATCH_SIZE
            writer.add_scalar('mfh_baseline/train_loss', c_mean_loss, iter_idx)
            writer.add_scalar('mfh_baseline/lr', optimizer.param_groups[0]['lr'], iter_idx)            
            print('{}\tTrain Epoch: {}\tIter: {}\tLoss: {:.4f}'.format(
                        now, epoch, iter_idx, c_mean_loss))
        if iter_idx % opt.CHECKPOINT_INTERVAL == 0 and iter_idx != 0:
            if not os.path.exists('./data'):
                os.makedirs('./data')
            save_path = './data/mfh_baseline_iter_' + str(iter_idx) + '.pth'
        if iter_idx % opt.VAL_INTERVAL == 0 and iter_idx != 0:
            test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation(model, opt, mode='val', folder=folder, it=iter_idx)
            writer.add_scalar('mfh_baseline/val_loss', test_loss, iter_idx)
            writer.add_scalar('mfh_baseline/accuracy', acc_overall, iter_idx)
            print ('Test loss:', test_loss)
            print ('Accuracy:', acc_overall)
            print ('Test per ans', acc_per_ans)
            results.append([iter_idx, c_mean_loss, test_loss, acc_overall, acc_per_ques, acc_per_ans])
            best_result_idx = np.array([x[3] for x in results]).argmax()
            print ('Best accuracy of', results[best_result_idx][3], 'was at iteration', results[best_result_idx][0])
            drawgraph(results, folder, opt.MFB_FACTOR_NUM, opt.MFB_OUT_DIM, prefix='mfb_baseline')
        if iter_idx % opt.TESTDEV_INTERVAL == 0 and iter_idx != 0:
            exec_validation(model, opt, mode='test-dev', folder=folder, it=iter_idx)
Exemple #2
0
def pred(opt, folder, logger):

    assert opt.RESUME_PATH, 'please specify the model file'

    dp = VQADataProvider(opt,
                         batchsize=opt.VAL_BATCH_SIZE,
                         mode='val',
                         logger=logger)
    opt.quest_vob_size, opt.ans_vob_size = dp.get_vocab_size()

    logger.info('==> Resuming from checkpoint..')
    checkpoint = torch.load(opt.RESUME_PATH, map_location='cpu')
    model = get_model(opt)
    model.load_state_dict(checkpoint['model'])
    model = cuda_wrapper(model)

    test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation(
        model,
        opt,
        mode='val',
        folder=folder,
        it=0,
        visualize=True,
        dp=dp,
        logger=logger)
    logger.info('Test loss: {}'.format(test_loss))
    logger.info('Accuracy: {}'.format(acc_overall))
    logger.info('Test per ans: {}'.format(acc_per_ans))