def main(opt):
    Initializer.manual_seed(opt.seed)
    print("Constructing the dataset...")
    if opt.trainval == 0:
        trainset = Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img, opt.img_name,
         opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \
         RCNN_Dataset(opt.data_path, opt.data_name, "train", opt.seq_per_img)
        trainLoader = DataLoader(trainset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=opt.pin_memory,
                                 drop_last=opt.drop_last,
                                 use_thread=opt.use_thread)

        valset = Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img, opt.img_name,
         opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \
         RCNN_Dataset(opt.data_path, opt.data_name, "val", opt.seq_per_img)
        valLoader = DataLoader(valset,
                               batch_size=opt.batch_size,
                               shuffle=opt.shuffle,
                               num_workers=opt.num_workers,
                               pin_memory=opt.pin_memory,
                               drop_last=opt.drop_last,
                               use_thread=opt.use_thread)
    else:
        trainset = Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img, opt.img_name,
         opt.size_scale, use_h5py=opt.use_h5py) if not opt.use_rcnn else \
         RCNN_Dataset(opt.data_path, opt.data_name, "trainval", opt.seq_per_img)
        trainLoader = DataLoader(trainset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=opt.pin_memory,
                                 drop_last=opt.drop_last,
                                 use_thread=opt.use_thread)

        valset = None
        valLoader = None

    idx2word = trainset.idx2word
    ans_pool = trainset.ans_pool
    ans_pool = torch.from_numpy(ans_pool)

    print("Building model...")
    word_embedded = LargeEmbedding(len(idx2word),
                                   300,
                                   padding_idx=0,
                                   devices=opt.gpus)
    word_embedded.load_pretrained_vectors(opt.word_vectors)

    if opt.predict_type in ["sum_attn", "cat_attn", "prod_attn"]:
        num_ans = ans_pool.size(0)
        model = DCN(opt, num_ans) if not opt.use_rcnn else DCNWithRCNN(
            opt, num_ans)
    else:
        ans = word_embedded(Variable(ans_pool.cuda(opt.gpus[0]),
                                     volatile=True)).data
        ans_mask = ans_pool.ne(0).float()
        model = DCNWithAns(opt, ans, ans_mask) if not opt.use_rcnn else \
         DCNWithRCNNAns(opt, ans, ans_mask)

    criterion = BinaryLoss()
    evaluation = Accuracy()

    dict_checkpoint = opt.train_from
    if dict_checkpoint:
        print("Loading model from checkpoint at %s" % dict_checkpoint)
        checkpoint = torch.load(dict_checkpoint)
        model.load_state_dict(checkpoint["model"])

    if len(opt.gpus) >= 1:
        model.cuda(opt.gpus[0])

    if len(opt.gpus) > 1:
        model = nn.DataParallel(model, opt.gpus, dim=0)
    model.word_embedded = word_embedded

    optimizer = Adam(list(filter(lambda x: x.requires_grad,
                                 model.parameters())),
                     lr=opt.lr,
                     weight_decay=opt.weight_decay,
                     record_step=opt.record_step)
    scheduler = lr_scheduler.StepLR(optimizer, opt.step_size, gamma=opt.gamma)
    optim_wrapper = OptimWrapper(optimizer, scheduler)

    nparams = []
    named_parameters = model.module.named_parameters() if len(
        opt.gpus) > 1 else model.named_parameters()
    for name, param in named_parameters:
        if not (name.startswith("resnet") or name.startswith("word_embedded")
                or name.startswith("ans")):
            nparams.append(param.numel())
    print("* Number of parameters: %d" % sum(nparams))

    checkpoint = None
    timer = Timer()
    timer.tic()
    try:
        with torch.cuda.device(opt.gpus[0]):
            trainModel(trainLoader, valLoader, model, criterion, evaluation,
                       optim_wrapper, opt)
    except KeyboardInterrupt:
        print("It toke %.2f hours to train the network" % (timer.toc() / 3600))
        sys.exit("Training interrupted")

    print("It toke %.2f hours to train the network" % (timer.toc() / 3600))
def trainEpoch(epoch, dataloader, model, criterion, evaluation, optim, opt,
               writer):
    model.train()
    loss_record = [Meter() for _ in range(3)]
    accuracy_record = [Meter() for _ in range(3)]
    timer = Timer()

    timer.tic()
    optim.step_epoch()
    for i, batch in enumerate(dataloader):
        if not opt.use_rcnn:
            img, ques, ques_mask, _, ans_idx = batch
        else:
            img, ques, img_mask, ques_mask, _, ans_idx = batch

        img = Variable(img) if opt.use_rcnn else Variable(img, volatile=True)
        img_mask = Variable(img_mask) if opt.use_rcnn else None
        ques = Variable(ques, volatile=True)
        ques_mask = Variable(ques_mask)
        ans_idx = Variable(ans_idx)

        img, img_mask, ques, ques_mask, ans_idx = \
         move_to_cuda((img, img_mask, ques, ques_mask, ans_idx), devices=opt.gpus)
        ques = model.word_embedded(ques)
        ques = Variable(ques.data)

        optim.zero_grad()
        score = model(img, ques, img_mask, ques_mask) if opt.use_rcnn else \
         model(img, ques, img_mask, ques_mask, is_train=True)

        loss = criterion(score, ans_idx)
        loss.backward()
        accuracy = evaluation(Variable(score.data, volatile=True),
                              Variable(ans_idx.data, volatile=True))
        _, ratio, updates, params = optim.step()

        for j in range(3):
            loss_record[j].update((loss.data[0] / opt.batch_size))
            accuracy_record[j].update(accuracy.data[0])

        if ratio is not None:
            writer.add_scalar("statistics/update_to_param_ratio",
                              ratio,
                              global_step=(epoch * len(dataloader) + i))
            writer.add_scalar("statistics/absolute_updates",
                              updates,
                              global_step=(epoch * len(dataloader) + i))
            writer.add_scalar("statistics/absolute_params",
                              params,
                              global_step=(epoch * len(dataloader) + i))

        if (i + 1) % 10 == 0:
            writer.add_scalar("iter/train_loss",
                              loss_record[0].avg,
                              global_step=(epoch * len(dataloader) + i))
            writer.add_scalar("iter/train_accuracy",
                              accuracy_record[0].avg,
                              global_step=(epoch * len(dataloader) + i))
            loss_record[0].reset()
            accuracy_record[0].reset()

        if (i + 1) % opt.log_interval == 0:
            print(
                "Epoch %5d; iter %6i; loss: %8.2f; accuracy: %8.2f; %6.0fs elapsed"
                % (epoch, i + 1, loss_record[1].avg, accuracy_record[1].avg,
                   timer.toc(average=False)))
            loss_record[1].reset()
            accuracy_record[1].reset()
            timer.tic()

    writer.add_scalar("epoch/train_loss",
                      loss_record[2].avg,
                      global_step=epoch)
    writer.add_scalar("epoch/train_accuracy",
                      accuracy_record[2].avg,
                      global_step=epoch)

    return loss_record[2].avg, accuracy_record[2].avg
Example #3
0
File: train.py Project: veda10/VQA
                 lr=opt['lr'],
                 weight_decay=opt['weight_decay'],
                 record_step=opt['record_step'])
scheduler = lr_scheduler.StepLR(optimizer,
                                opt['step_size'],
                                gamma=opt['gamma'])
optim_wrapper = OptimWrapper(optimizer, scheduler)

nparams = []
named_parameters = model.module.named_parameters() if len(
    opt['gpus']) > 1 else model.named_parameters()
for name, param in named_parameters:
    if not (name.startswith("resnet") or name.startswith("word_embedded")
            or name.startswith("ans")):
        nparams.append(param.numel())
print("* Number of parameters: %d" % sum(nparams))

checkpoint = None
timer = Timer()
timer.tic()
try:
    with torch.cuda.device(opt['gpus'][0]):
        print('Training model....')
        trainModel(trainLoader, valLoader, model, criterion, evaluation,
                   optim_wrapper, opt)
except KeyboardInterrupt:
    print("It took %.2f hours to train the network" % (timer.toc() / 3600))
    sys.exit("Training interrupted")

print("It toke %.2f hours to train the network" % (timer.toc() / 3600))