Ejemplo n.º 1
0
def train(args):
    """
        Train a VQA model using the training set
    """

    # set random seed
    torch.manual_seed(1000)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(1000)
    else:
        raise SystemExit('No CUDA available, script requires cuda')

    # Load the VQA training set
    print('Loading data')
    dataset = VQA_Dataset(args.data_dir, args.emb)
    loader = DataLoader(dataset,
                        batch_size=args.bsize,
                        shuffle=True,
                        num_workers=5,
                        collate_fn=collate_fn)

    # Load the VQA validation set
    val_dataset = VQA_Dataset(args.data_dir, args.emb, train=False)

    val_loader = DataLoader(val_dataset,
                            batch_size=args.bsize,
                            shuffle=False,
                            num_workers=4,
                            collate_fn=collate_fn)

    n_batches = len(dataset) // args.bsize

    # Print data and model parameters
    print('Parameters:\n\t'
          'vocab size: %d\n\tembedding dim: %d\n\tfeature dim: %d'
          '\n\thidden dim: %d\n\toutput dim: %d' %
          (dataset.q_words, args.emb, dataset.feat_dim, args.hid,
           dataset.n_answers))
    print('Initializing model')

    model = Model(vocab_size=dataset.q_words,
                  emb_dim=args.emb,
                  feat_dim=dataset.feat_dim,
                  hid_dim=args.hid,
                  out_dim=dataset.n_answers,
                  dropout=args.dropout,
                  neighbourhood_size=args.neighbourhood_size,
                  pretrained_wemb=dataset.pretrained_wemb)

    criterion = nn.MultiLabelSoftMarginLoss()

    # Move it to GPU
    #model = model.cuda()
    model = nn.DataParallel(model).cuda()

    criterion = criterion.cuda()

    # Define the optimiser
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    logger = Logger(os.path.join('save/', 'log.txt'))

    # Continue training from saved model
    start_ep = 0
    if args.model_path and os.path.isfile(args.model_path):
        print('Resuming from checkpoint %s' % (args.model_path))
        ckpt = torch.load(args.model_path)
        start_ep = ckpt['epoch']
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])

    # Update the learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = args.lr

    # Learning rate scheduler
    scheduler = MultiStepLR(optimizer, milestones=[5, 10, 15], gamma=0.1)
    scheduler.last_epoch = start_ep - 1

    # Train iterations
    print('Start training.')
    for ep in range(start_ep, start_ep + args.ep):
        scheduler.step()
        ep_loss = 0.0
        ep_correct = 0.0
        ave_loss = 0.0
        ave_correct = 0.0
        losses = []

        for step, next_batch in tqdm(enumerate(loader)):

            model.train()
            # Move batch to cuda
            q_batch, a_batch, vote_batch, i_batch, k_batch, qlen_batch = \
                batch_to_cuda(next_batch)

            # forward pass
            output = model(q_batch, i_batch, k_batch, qlen_batch)

            #loss = criterion(output[0], a_batch)

            loss = criterion(output[0], a_batch) + criterion(
                output[1], a_batch)
            output = output[0] + output[1]

            # Compute batch accuracy based on vqa evaluation
            correct = total_vqa_score(output, vote_batch)
            ep_correct += correct
            ep_loss += loss.data.item()
            ave_correct += correct
            ave_loss += loss.data.item()
            losses.append(loss.cpu().data.item())

            # This is a 40 step average
            if step % TRAIN_REPORT_INTERVAL == 0 and step != 0:
                logger.write(
                    '  Epoch %02d(%03d/%03d), avg loss: %.7f, avg accuracy: %.2f%%'
                    % (ep + 1, step, n_batches,
                       ave_loss / TRAIN_REPORT_INTERVAL, ave_correct * 100 /
                       (args.bsize * TRAIN_REPORT_INTERVAL)))

                ave_correct = 0
                ave_loss = 0

            # Compute gradient and do optimisation step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        scheduler.step()

        # save model and compute accuracy for epoch
        epoch_loss = ep_loss / n_batches
        epoch_acc = ep_correct * 100 / (n_batches * args.bsize)

        print('Epoch %02d done, average loss: %.3f, average accuracy: %.2f%%' %
              (ep + 1, epoch_loss, epoch_acc))
        logger.write(
            'Epoch %02d done, average loss: %.3f, average accuracy: %.2f%%' %
            (ep + 1, epoch_loss, epoch_acc))

        #Compute validation accuracy at the end of epoch
        model.eval()
        save(model,
             optimizer,
             ep,
             epoch_loss,
             epoch_acc,
             dir=args.save_dir,
             name=args.name + '_' + str(ep + 1))
        with torch.no_grad():
            test_correct = 0
            for data in val_loader:

                q_batch, a_batch, vote_batch, i_batch, k_batch, qlen_batch = \
                    batch_to_cuda(data)

                # forward pass
                output = model(q_batch, i_batch, k_batch, qlen_batch)

                output = output[0] + output[1]
                test_correct += total_vqa_score(output, vote_batch)
            acc = test_correct / len(val_dataset) * 100
            logger.write("Validation accuracy: {:.2f} %".format(acc))
Ejemplo n.º 2
0
def trainval(args):
    """
        Train a VQA model using the training + validation set
    """

    # set random seed
    torch.manual_seed(1000)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(1000)
    else:
        raise SystemExit('No CUDA available, script requires CUDA.')

    # load train+val sets for training
    print('Loading data')
    dataset = VQA_Dataset_Test(args.data_dir, args.emb)
    loader = DataLoader(dataset,
                        batch_size=args.bsize,
                        shuffle=True,
                        num_workers=5,
                        collate_fn=collate_fn)
    n_batches = len(dataset) // args.bsize

    # Print data and model parameters
    print(
        'Parameters:\n\tvocab size: %d\n\tembedding dim: %d\n\tfeature dim: %d\
            \n\thidden dim: %d\n\toutput dim: %d' %
        (dataset.q_words, args.emb, dataset.feat_dim, args.hid,
         dataset.n_answers))
    print('Initializing model')

    model = Model(vocab_size=dataset.q_words,
                  emb_dim=args.emb,
                  feat_dim=dataset.feat_dim,
                  hid_dim=args.hid,
                  out_dim=dataset.n_answers,
                  dropout=args.dropout,
                  neighbourhood_size=args.neighbourhood_size,
                  pretrained_wemb=dataset.pretrained_wemb)

    criterion = nn.MultiLabelSoftMarginLoss()

    # Move it to GPU
    model = model.cuda()
    criterion = criterion.cuda()

    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # Continue training from saved model
    start_ep = 0
    if args.model_path and os.path.isfile(args.model_path):
        print('Resuming from checkpoint %s' % (args.model_path))
        ckpt = torch.load(args.model_path)
        start_ep = ckpt['epoch']
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])

    # ensure you can load with new lr
    for param_group in optimizer.param_groups:
        param_group['lr'] = args.lr

    # learner rate scheduler
    scheduler = MultiStepLR(optimizer, milestones=[30], gamma=0.5)
    scheduler.last_epoch = start_ep - 1

    # Training script
    print('Start training.')
    for ep in range(start_ep, start_ep + args.ep):
        scheduler.step()
        ep_loss = 0.0
        ep_correct = 0.0
        ave_loss = 0.0
        ave_correct = 0.0
        losses = []
        for step, next_batch in tqdm(enumerate(loader)):
            model.train()
            # batch to gpu
            q_batch, a_batch, vote_batch, i_batch, k_batch, qlen_batch = \
                batch_to_cuda(next_batch)

            # Do model forward
            output = model(q_batch, i_batch, k_batch, qlen_batch)

            loss = criterion(output, a_batch)

            # compute accuracy based on vqa evaluation
            correct = total_vqa_score(output, vote_batch)
            ep_correct += correct
            ep_loss += loss.data[0]
            ave_correct += correct
            ave_loss += loss.data[0]
            losses.append(loss.cpu().data[0])
            # This is a 40 step average
            if step % 40 == 0 and step != 0:
                print(
                    '  Epoch %02d(%03d/%03d), ave loss: %.7f, ave accuracy: %.2f%%'
                    % (ep + 1, step, n_batches, ave_loss / 40,
                       ave_correct * 100 / (args.bsize * 40)))

                ave_correct = 0
                ave_loss = 0
                ave_correct = ave_loss = ave_sparsity = 0

            # compute gradient and do optim step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # save model and compute accuracy for epoch
        epoch_loss = ep_loss / n_batches
        epoch_acc = ep_correct * 100 / (n_batches * args.bsize)

        save(model,
             optimizer,
             ep,
             epoch_loss,
             epoch_acc,
             dir=args.save_dir,
             name=args.name + '_' + str(ep + 1))

        print('Epoch %02d done, average loss: %.3f, average accuracy: %.2f%%' %
              (ep + 1, epoch_loss, epoch_acc))
Ejemplo n.º 3
0
    # Create the network
    net = WSDDN(base_net=args.base_net)

    if OFFSET != 0:
        state_path = os.path.join(BASE_DIR, "states", f"epoch_{OFFSET}.pt")
        net.load_state_dict(torch.load(state_path))
        tqdm.write(f"Loaded epoch {OFFSET}'s state.")

    net.to(DEVICE)
    net.train()

    # Set loss function and optimizer
    optimizer = optim.Adam(net.parameters(), lr=LR, weight_decay=WD)
    scheduler = MultiStepLR(optimizer, milestones=[10, 20], gamma=0.1)
    scheduler.last_epoch = OFFSET

    # Train the model
    for epoch in tqdm(range(OFFSET + 1, EPOCHS + 1), "Total"):

        epoch_loss = 0.0

        for (
                batch_img_ids,
                batch_imgs,
                batch_boxes,
                batch_scores,
                batch_target,
        ) in tqdm(train_dl, f"Epoch {epoch}"):
            optimizer.zero_grad()
Ejemplo n.º 4
0
def train(args):
    """
        Train a VQG model using the training set and validate on val set.
    """

    # Load the VQA training set
    print('Loading data...')
    dataset = VQA_Dataset(args.data_dir, args.emb)
    loader = DataLoader(dataset,
                        batch_size=args.bsize,
                        shuffle=True,
                        num_workers=0,
                        collate_fn=collate_fn)

    # Load the VQA validation set
    dataset_test = VQA_Dataset(args.data_dir, args.emb, train=False)
    loader_val = DataLoader(dataset_test,
                            batch_size=args.bsize,
                            shuffle=False,
                            num_workers=0,
                            collate_fn=collate_fn)

    n_batches = len(dataset) // args.bsize
    question_vocab = pickle.load(
        open('/mnt/data/xiaojinhui/wangtan_MM/vqa-project/data/train_q_dict.p',
             'rb'))

    # Print data and model parameters
    print('Parameters:\n\t'
          'vocab size: %d\n\tembedding dim: %d\n\tfeature dim: %d'
          '\n\thidden dim: %d\n\toutput dim: %d' %
          (dataset.n_answers, args.emb, dataset.feat_dim, args.hid,
           dataset.n_answers))

    print('Initializing model')
    model_gcn = conditional_GCN(nfeat=options['gcn']['nfeat'],
                                nhid=options['gcn']['nhid'],
                                nclass=options['gcn']['nclass'],
                                emb=options['gcn']['fliter_emb'],
                                dropout=options['gcn']['dropout'])
    # model_gcn_nofinding = layer_vqg.conditional_GCN_1(nfeat=options['gcn']['nfeat'],
    #                             nhid=options['gcn']['nhid'],
    #                             nclass=options['gcn']['nclass'],
    #                             emb = options['gcn']['fliter_emb'],
    #                             dropout=options['gcn']['dropout'])
    model_vqg = question_gen(vocab=question_vocab['wtoi'],
                             vocab_i2t=question_vocab['itow'],
                             opt=options['vqg'])
    # no_finding = layer_vqg.no_finding_area_top(in_feature=2652, hidden_feature=512, dropout=options['gcn']['dropout'])

    criterion = nn.CrossEntropyLoss()

    # Move it to GPU
    model_gcn = model_gcn.cuda()
    model_vqg = model_vqg.cuda()
    # model_gcn_nofinding = model_gcn_nofinding.cuda()
    # no_finding = no_finding.cuda()
    criterion = criterion.cuda()

    # Define the optimiser
    optimizer = torch.optim.Adam([{
        'params': model_gcn.parameters()
    }, {
        'params': model_vqg.parameters()
    }],
                                 lr=args.lr)

    # Continue training from saved model
    start_ep = 0
    if args.model_path and os.path.isfile(args.model_path):
        print('Resuming from checkpoint %s' % (args.model_path))
        ckpt = torch.load(args.model_path)
        start_ep = ckpt['epoch']
        model_gcn.load_state_dict(ckpt['state_dict_gcn'])
        model_vqg.load_state_dict(ckpt['state_dict_vqg'])
        optimizer.load_state_dict(ckpt['optimizer'])

    # Update the learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = args.lr

    # Learning rate scheduler
    scheduler = MultiStepLR(optimizer, milestones=[30], gamma=0.5)
    scheduler.last_epoch = start_ep - 1

    # Train iterations
    print('Start training.')
    bleu_best = [0.0, 0.0, 0.0, 0.0]
    cider_best = 0.0
    meteor_best = 0.0
    rouge_best = 0.0
    for ep in range(start_ep, start_ep + args.ep):

        adjust_learning_rate(optimizer, ep)
        scheduler.step()
        ep_loss = 0.0
        ep_top3 = 0.0
        ep_top1 = 0.0
        ave_loss = 0.0
        ave_top3 = 0.0
        ave_top1 = 0.0
        iter_time_all = 0.0

        for step, next_batch in enumerate(loader):

            model_gcn.train()
            model_vqg.train()
            # Move batch to cuda
            target_q, an_feat, img_feat, adj_mat = \
                utils.batch_to_cuda(next_batch, volatile=True)

            # forward pass
            torch.cuda.synchronize()
            start = time.time()

            # img_feat_no = torch.mul(img_feat[:,:,None,:], img_feat[:,None,:,:]).view(-1, 2652)
            # adj_mat = no_finding(img_feat_no).view(-1,36,36)
            # adj_mat += torch.eye(36).cuda()
            # adj_mat = torch.clamp(adj_mat, max=1)
            feat_gcn, adj_new = model_gcn(img_feat, adj_mat)
            # feat_gcn, adj_new = model_gcn_nofinding(img_feat, adj_mat)
            output = model_vqg(feat_gcn, an_feat, target_q)
            # for i in range(256):
            #     dataset.drawarea[i]['adj'] = adj_new[i].detach().cpu().numpy().tolist()
            #     dataset.drawarea[i]['adj_diag'] = np.diag(adj_new[i].detach().cpu().numpy()).tolist()
            #
            # json.dump(dataset.drawarea, open('/mnt/data/xiaojinhui/wangtan_MM/vqa-project/draw/new_adj_t.json', 'w'))
            # output_bs = model_vqg.beam_search(feat_gcn[0].unsqueeze(0),
            #                                   an_feat[0].unsqueeze(0))
            target_q = target_q[:, 1:].contiguous()

            loss = criterion(
                output.view(output.size(0) * output.size(1), output.size(2)),
                target_q.view(target_q.size(0) * target_q.size(1)))

            # Compute batch accu

            top1 = utils.accuracy(output, target_q, 1)
            top3 = utils.accuracy(output, target_q, 3)

            ep_top1 += top1
            ep_top3 += top3
            ep_loss += loss.item()
            ave_top1 += top1
            ave_top3 += top3
            ave_loss += loss.item()

            # This is a 40 step average
            if step % 40 == 0 and step != 0:
                print(
                    '  Epoch %02d(%03d/%03d), ave loss: %.7f, top1: %.2f%%, top3: %.2f%%, iter time: %.4fs'
                    % (ep + 1, step, n_batches, ave_loss / 40, ave_top1 / 40,
                       ave_top3 / 40, iter_time_all / 40))

                ave_top1 = 0
                ave_top3 = 0
                ave_loss = 0
                iter_time_all = 0

            # Compute gradient and do optimisation step
            optimizer.zero_grad()
            loss.backward()
            # clip_grad_norm_(model_gcn.parameters(), 2.)
            # clip_grad_norm_(model_gcn.parameters(), 2.)
            # clip_grad_norm_(no_finding.parameters(), 2.)
            optimizer.step()

            end = time.time()
            iter_time = end - start
            iter_time_all += iter_time

            # save model and compute validation accuracy every 400 steps
            if step == 0:
                with torch.no_grad():
                    epoch_loss = ep_loss / n_batches
                    epoch_top1 = ep_top1 / n_batches
                    epoch_top3 = ep_top3 / n_batches

                    # compute validation accuracy over a small subset of the validation set
                    model_gcn.train(False)
                    model_vqg.train(False)
                    model_gcn.eval()
                    model_vqg.eval()

                    output_all = []
                    output_all_bs = {}
                    ref_all = []

                    flag_val = 0

                    for valstep, val_batch in tqdm(enumerate(loader_val)):
                        # test_batch = next(loader_test)
                        target_q, an_feat, img_feat, adj_mat = \
                            utils.batch_to_cuda(val_batch, volatile=True)
                        # img_feat_no = torch.mul(img_feat[:, :, None, :], img_feat[:, None, :, :]).view(-1, 2652)
                        # adj_mat = no_finding(img_feat_no).view(-1, 36, 36)
                        # adj_mat += torch.eye(36).cuda()
                        # adj_mat = torch.clamp(adj_mat, max=1)
                        # feat_gcn, _ = model_gcn_nofinding(img_feat, adj_mat)
                        feat_gcn, adj_new = model_gcn(img_feat, adj_mat)
                        output = model_vqg.generate(feat_gcn, an_feat)

                        for j in range(feat_gcn.size(0)):
                            output_bs = model_vqg.beam_search(
                                feat_gcn[j].unsqueeze(0),
                                an_feat[j].unsqueeze(0))
                            output_all_bs[flag_val] = output_bs
                            flag_val += 1

                        output_all.append(output.cpu().numpy())
                        ref_all.append(target_q[:, :-1].cpu().numpy())

                    gen, ref = utils.idx2question(
                        np.concatenate(output_all, 0),
                        np.concatenate(ref_all, 0), question_vocab['itow'])
                    print(gen.values()[:10])

                    # save the best
                    bleu, cider, meteor, rouge = main.main(ref, gen)
                    bleu_best, cider_best, meteor_best, rouge_best, choice = utils.save_the_best(
                        bleu, cider, meteor, rouge, bleu_best, cider_best,
                        meteor_best, rouge_best)
                    if choice:
                        utils.save(model_gcn,
                                   model_vqg,
                                   optimizer,
                                   ep,
                                   epoch_loss,
                                   epoch_top1,
                                   dir=args.save_dir,
                                   name=args.name + '_' + str(ep + 1))

                    print('use beam search...')
                    bleu, cider, meteor, rouge = main.main(ref, output_all_bs)
                    bleu_best, cider_best, meteor_best, rouge_best, choice = utils.save_the_best(
                        bleu, cider, meteor, rouge, bleu_best, cider_best,
                        meteor_best, rouge_best)
                    if choice:
                        utils.save(model_gcn,
                                   model_vqg,
                                   optimizer,
                                   ep,
                                   epoch_loss,
                                   epoch_top1,
                                   dir=args.save_dir,
                                   name=args.name + '_' + str(ep + 1))

                    print(
                        'the best bleu: %s, cider: %.6s, meteor: %.6s, rouge: %.6s'
                        % (bleu_best, cider_best, meteor_best, rouge_best))
                    print(output_all_bs.values()[:10])

                model_gcn.train(True)
                model_vqg.train(True)
Ejemplo n.º 5
0
def train(model, train_loader, eval_loader, opt):
    utils.create_dir(opt.output)
    optim = torch.optim.Adam(model.parameters(), lr=opt.learning_rate, betas=(0.9, 0.999), eps=1e-08,
                             weight_decay=opt.weight_decay)

    
    logger = utils.Logger(os.path.join(opt.output, 'log.txt'))

    utils.print_model(model, logger)
    for param_group in optim.param_groups:
        param_group['lr'] = opt.learning_rate

    scheduler = MultiStepLR(optim, milestones=[100], gamma=0.8)

    scheduler.last_epoch = opt.s_epoch

    

    best_eval_score = 0
    for epoch in range(opt.s_epoch, opt.num_epochs):
        total_loss = 0
        total_norm = 0
        count_norm = 0
        train_score = 0
        t = time.time()
        N = len(train_loader.dataset)
        scheduler.step()

        for i, (v, b, a, _, qa_text, _, _, q_t, bias) in enumerate(train_loader):
            v = v.cuda()
            b = b.cuda()
            a = a.cuda()
            bias = bias.cuda()
            qa_text = qa_text.cuda()
            rand_index = random.sample(range(0, opt.train_candi_ans_num), opt.train_candi_ans_num)
            qa_text = qa_text[:,rand_index,:]
            a = a[:,rand_index]
            bias = bias[:,rand_index]

            if opt.lp == 0:
                logits = model(qa_text, v, b, epoch, 'train')
                loss = instance_bce_with_logits(logits, a, reduction='mean')
            elif opt.lp == 1:
                logits = model(qa_text, v, b, epoch, 'train')
                loss_pos = instance_bce_with_logits(logits, a, reduction='mean')
                index = random.sample(range(0, v.shape[0]), v.shape[0])
                v_neg = v[index]
                b_neg = b[index]
                logits_neg = model(qa_text, v_neg, b_neg, epoch, 'train')
                self_loss = compute_self_loss(logits_neg, a)
                loss = loss_pos + opt.self_loss_weight * self_loss
            elif opt.lp == 2:
                logits, loss = model(qa_text, v, b, epoch, 'train', bias, a)
            else:
                assert 1==2
           
            loss.backward()

            total_norm += nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)
            count_norm += 1

            optim.step()
            optim.zero_grad()

            score = compute_score_with_logits(logits, a.data).sum()
            train_score += score.item()
            total_loss += loss.item() * v.size(0)

            if i != 0 and i % 100 == 0:
                print(
                    'training: %d/%d, train_loss: %.6f, train_acc: %.6f' %
                    (i, len(train_loader), total_loss / (i * v.size(0)),
                     100 * train_score / (i * v.size(0))))
        total_loss /= N
        if None != eval_loader:
            model.train(False)
            eval_score, bound = evaluate(model, eval_loader, opt)
            model.train(True)

        logger.write('\nlr: %.7f' % optim.param_groups[0]['lr'])
        logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t))
        logger.write(
            '\ttrain_loss: %.2f, norm: %.4f, score: %.2f' % (total_loss, total_norm / count_norm, train_score))
        if eval_loader is not None:
            logger.write('\teval score: %.2f (%.2f)' % (100 * eval_score, 100 * bound))


        if (eval_loader is not None and eval_score > best_eval_score):
            if opt.lp == 0:
                model_path = os.path.join(opt.output, 'SAR_top'+str(opt.train_candi_ans_num)+'_best_model.pth')
            elif opt.lp == 1:
                model_path = os.path.join(opt.output, 'SAR_SSL_top'+str(opt.train_candi_ans_num)+'_best_model.pth')
            elif opt.lp == 2:
                model_path = os.path.join(opt.output, 'SAR_LMH_top'+str(opt.train_candi_ans_num)+'_best_model.pth')
            utils.save_model(model_path, model, epoch, optim)
            if eval_loader is not None:
                best_eval_score = eval_score
Ejemplo n.º 6
0
def train(model, train_loader, eval_loader, opt):

    utils.create_dir(opt.output)
    optim = torch.optim.Adam(model.parameters(),
                             lr=opt.learning_rate,
                             betas=(0.9, 0.999),
                             eps=1e-08,
                             weight_decay=opt.weight_decay)
    logger = utils.Logger(os.path.join(opt.output, 'log.txt'))

    utils.print_model(model, logger)

    # load snapshot
    if opt.checkpoint_path is not None:
        print('loading %s' % opt.checkpoint_path)
        model_data = torch.load(opt.checkpoint_path)
        model.load_state_dict(model_data.get('model_state', model_data))
        optim.load_state_dict(model_data.get('optimizer_state', model_data))
        opt.s_epoch = model_data['epoch'] + 1

    for param_group in optim.param_groups:
        param_group['lr'] = opt.learning_rate

    scheduler = MultiStepLR(optim,
                            milestones=[10, 15, 20, 25, 30, 35],
                            gamma=0.5)
    scheduler.last_epoch = opt.s_epoch

    best_eval_score = 0
    for epoch in range(opt.s_epoch, opt.num_epochs):
        total_loss = 0
        total_bce_loss = 0
        self_loss = 0
        total_self_loss = 0
        train_score_pos = 0
        train_score_neg = 0
        total_norm = 0
        count_norm = 0
        t = time.time()
        N = len(train_loader.dataset)
        scheduler.step()

        for i, (v, b, q, a, _) in enumerate(train_loader):
            v = v.cuda()
            q = q.cuda()
            a = a.cuda()

            # for the labeled samples
            if epoch < opt.pretrain_epoches:
                logits_pos, _ = model(q, v, False)
                if opt.ml_loss:
                    bce_loss_pos = instance_bce_with_logits(logits_pos,
                                                            a,
                                                            reduction='mean')
                else:
                    bce_loss_pos = instance_bce(logits_pos, a)
                loss = bce_loss_pos
            else:
                logits_pos, logits_neg, _, _ = model(q, v, True)
                if opt.ml_loss:  #use multi-label loss
                    bce_loss_pos = instance_bce_with_logits(logits_pos,
                                                            a,
                                                            reduction='mean')
                else:  #use cross-entropy loss
                    bce_loss_pos = instance_bce(logits_pos, a)

                self_loss = compute_self_loss(logits_neg, a)

                loss = bce_loss_pos + opt.self_loss_weight * self_loss

            loss.backward()
            total_norm += nn.utils.clip_grad_norm_(model.parameters(),
                                                   opt.grad_clip)
            count_norm += 1
            optim.step()
            optim.zero_grad()

            score_pos = compute_score_with_logits(logits_pos, a.data).sum()
            train_score_pos += score_pos.item()
            total_loss += loss.item() * v.size(0)
            total_bce_loss += bce_loss_pos.item() * v.size(0)

            if epoch < opt.pretrain_epoches:  #pretrain
                total_self_loss = 0
                train_score_neg = 0
            else:  #fintune
                score_neg = compute_score_with_logits(logits_neg, a.data).sum()
                total_self_loss += self_loss.item() * v.size(0)
                train_score_neg += score_neg.item()
            if i != 0 and i % 100 == 0:
                print(
                    'traing: %d/%d, train_loss: %.6f, bce_loss: %.6f, self_loss: %.6f, neg_train_acc: %.6f, pos_train_acc: %.6f'
                    %
                    (i, len(train_loader), total_loss /
                     (i * v.size(0)), total_bce_loss /
                     (i * v.size(0)), total_self_loss /
                     (i * v.size(0)), 100 * train_score_neg /
                     (i * v.size(0)), 100 * train_score_pos / (i * v.size(0))))

        total_loss /= N
        total_bce_loss /= N
        total_self_loss /= N
        train_score_pos = 100 * train_score_pos / N
        if None != eval_loader:
            model.train(False)
            eval_score, bound, entropy = evaluate(model, eval_loader)
            model.train(True)

        logger.write('\nlr: %.7f' % optim.param_groups[0]['lr'])
        logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t))
        logger.write('\ttrain_loss: %.2f, norm: %.4f, score: %.2f' %
                     (total_loss, total_norm / count_norm, train_score_pos))
        if eval_loader is not None:
            logger.write('\teval score: %.2f (%.2f)' %
                         (100 * eval_score, 100 * bound))

        if eval_loader is not None and entropy is not None:
            info = '' + ' %.2f' % entropy
            logger.write('\tentropy: ' + info)

        if (eval_loader is not None and eval_score > best_eval_score):
            model_path = os.path.join(opt.output, 'best_model.pth')
            utils.save_model(model_path, model, epoch, optim)
            if eval_loader is not None:
                best_eval_score = eval_score
Ejemplo n.º 7
0
def train(args):
    """
        Train a VQA model using the training set
    """

    # set random seed
    torch.manual_seed(1000)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(1000)
    else:
        raise SystemExit('No CUDA available, script requires cuda')

    # Load the VQA training set
    print('Loading data')
    dataset = VQA_Dataset(args.data_dir, args.emb)
    loader = DataLoader(dataset,
                        batch_size=args.bsize,
                        shuffle=True,
                        num_workers=5,
                        collate_fn=collate_fn)

    # Load the VQA validation set
    dataset_test = VQA_Dataset(args.data_dir, args.emb, train=False)
    test_sampler = RandomSampler(dataset_test)
    loader_test = iter(
        DataLoader(dataset_test,
                   batch_size=args.bsize,
                   sampler=test_sampler,
                   shuffle=False,
                   num_workers=4,
                   collate_fn=collate_fn))

    n_batches = len(dataset) // args.bsize

    # Print data and model parameters
    print('Parameters:\n\t'
          'vocab size: %d\n\tembedding dim: %d\n\tfeature dim: %d'
          '\n\thidden dim: %d\n\toutput dim: %d' %
          (dataset.q_words, args.emb, dataset.feat_dim, args.hid,
           dataset.n_answers))
    print('Initializing model')

    model = Model(vocab_size=dataset.q_words,
                  emb_dim=args.emb,
                  feat_dim=dataset.feat_dim,
                  hid_dim=args.hid,
                  out_dim=dataset.n_answers,
                  dropout=args.dropout,
                  neighbourhood_size=args.neighbourhood_size,
                  pretrained_wemb=dataset.pretrained_wemb)

    criterion = nn.MultiLabelSoftMarginLoss()

    # Move it to GPU
    model = model.cuda()
    criterion = criterion.cuda()

    # Define the optimiser
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # Continue training from saved model
    start_ep = 0
    if args.model_path and os.path.isfile(args.model_path):
        print('Resuming from checkpoint %s' % (args.model_path))
        ckpt = torch.load(args.model_path)
        start_ep = ckpt['epoch']
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])

    # Update the learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = args.lr

    # Learning rate scheduler
    scheduler = MultiStepLR(optimizer, milestones=[30], gamma=0.5)
    scheduler.last_epoch = start_ep - 1

    # Split incoming data across gpus
    net = nn.DataParallel(model.train(True))

    # Train iterations
    print('Start training.')
    for ep in range(start_ep, start_ep + args.ep):

        scheduler.step()
        ep_loss = 0.0
        ep_correct = 0.0
        ave_loss = 0.0
        ave_correct = 0.0
        losses = []

        for step, next_batch in tqdm(enumerate(loader)):

            model.train()
            # Move batch to cuda
            q_batch, a_batch, vote_batch, i_batch, k_batch, qlen_batch = \
                batch_to_cuda(next_batch)

            # forward pass
            output, adjacency_matrix = net(q_batch, i_batch, k_batch,
                                           qlen_batch)

            loss = criterion(output, a_batch)

            # Compute batch accuracy based on vqa evaluation
            correct = total_vqa_score(output, vote_batch)
            ep_correct += correct
            ep_loss += loss.data[0]
            ave_correct += correct
            ave_loss += loss.data[0]
            losses.append(loss.cpu().data[0])

            # This is a 40 step average
            if step % 40 == 0 and step != 0:
                print(
                    '  Epoch %02d(%03d/%03d), ave loss: %.7f, ave accuracy: %.2f%%'
                    % (ep + 1, step, n_batches, ave_loss / 40,
                       ave_correct * 100 / (args.bsize * 40)))

                ave_correct = 0
                ave_loss = 0

            # Compute gradient and do optimisation step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # save model and compute validation accuracy every 400 steps
            if step % 400 == 0:
                epoch_loss = ep_loss / n_batches
                epoch_acc = ep_correct * 100 / (n_batches * args.bsize)

                save(model,
                     optimizer,
                     ep,
                     epoch_loss,
                     epoch_acc,
                     dir=args.save_dir,
                     name=args.name + '_' + str(ep + 1))

                # compute validation accuracy over a small subset of the validation set
                test_correct = 0
                net = nn.DataParallel(model.train(False))

                for i in range(10):
                    test_batch = next(loader_test)
                    q_batch, a_batch, vote_batch, i_batch, k_batch, qlen_batch = \
                        batch_to_cuda(test_batch, volatile=True)
                    output, _ = net(q_batch, i_batch, k_batch, qlen_batch)
                    test_correct += total_vqa_score(output, vote_batch)

                net = nn.DataParallel(model.train(True))
                acc = test_correct / (10 * args.bsize) * 100
                print("Validation accuracy: {:.2f} %".format(acc))

        # save model and compute accuracy for epoch
        epoch_loss = ep_loss / n_batches
        epoch_acc = ep_correct * 100 / (n_batches * args.bsize)

        save(model,
             optimizer,
             ep,
             epoch_loss,
             epoch_acc,
             dir=args.save_dir,
             name=args.name + '_' + str(ep + 1))

        print('Epoch %02d done, average loss: %.3f, average accuracy: %.2f%%' %
              (ep + 1, epoch_loss, epoch_acc))