Esempio n. 1
0
def main(args):
    tf = '_tf' if args.tf else ''
    run_dir = os.path.join(
        'results/', args.config + tf + '_%.4f' % args.lambda_ient +
        '_%.4f' % args.lambda_tent)
    word_dict = json.load(open(args.data + '/word_dict.json', 'r'))
    vocabulary_size = len(word_dict)
    encoder = Encoder(args.network, args.config)
    decoder = Decoder(vocabulary_size, encoder.dim, args.tf)
    optimizer = optim.Adam(decoder.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, args.step_size)
    encoder.cuda()
    decoder.cuda()
    cross_entropy_loss = nn.CrossEntropyLoss().cuda()
    saver = Saver(Trainer(encoder, decoder, optimizer, scheduler), run_dir)
    writer = SummaryWriter(saver.log_dir)
    train_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    val_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data, split_type='val'),
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=1)

    test_loader = torch.utils.data.DataLoader(ImageCaptionDataset(
        data_transforms, args.data, split_type='test'),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=1)

    print('Starting training with {}'.format(args))
    for epoch in range(saver.start_epoch, args.epochs + 1):
        train(epoch, encoder, decoder, optimizer, cross_entropy_loss,
              train_loader, word_dict, args.alpha_c, args.log_interval, writer,
              saver, val_loader, args)
        saver.save_model(epoch)
        validate(epoch, encoder, decoder, cross_entropy_loss, val_loader,
                 word_dict, args.alpha_c, args.log_interval, writer, saver)
        test(epoch, encoder, decoder, cross_entropy_loss, test_loader,
             word_dict, args.alpha_c, args.log_interval, writer, saver)
        old_lr = optimizer.param_groups[0]['lr']
        scheduler.step()
        lr = optimizer.param_groups[0]['lr']
        print('learning rate %.7f -> %.7f' % (old_lr, lr))
    writer.close()
Esempio n. 2
0
def train(args, model, train_set):
    # to cuda
    model.cuda()
    model.train()

    # dataloader
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               drop_last=True,
                                               shuffle=True,
                                               num_workers=int(
                                                   args.num_threads))

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=args.scheduler_step_size,
        gamma=args.scheduler_gamma)

    # saver
    saver = Saver(args)

    # loss function
    criterion = torch.nn.L1Loss()

    # time
    time_start_train = time.time()

    # misc
    num_batch = train_set.__len__() // args.batch_size
    counter = 0
    backup_codes(args)

    # compute paras
    params = network_paras(model)
    log = "num of parameters: {:,}".format(params)
    saver.save_log(log)
    print(log)

    # init weights
    def weights_init(m):
        if isinstance(m, torch.nn.Conv2d):
            init.kaiming_normal_(m.weight.data)

    if not args.is_finetuning:
        model.apply(weights_init)

    # start training
    print('{:=^40}'.format(' training start '))
    for epoch in range(args.epochs):
        scheduler.step(epoch)
        running_loss = 0.0
        for bidx, (_, im_lr, im_hr) in enumerate(train_loader):
            im_lr = Variable(im_lr.cuda(), volatile=False)
            im_hr = Variable(im_hr.cuda())

            # zero the parameter gradients
            model.zero_grad()

            # forward
            output = model(im_lr)

            # loss
            loss = criterion(output, im_hr)

            # backward & update
            loss.backward()
            optimizer.step()

            # accumulate running loss
            running_loss += loss.cpu().item()

            # print for every N batch
            if counter % args.step_print_loss == 0:
                # time
                acc_time = time.time() - time_start_train

                # log
                log = 'epoch: (%d/%d) [%5d/%5d], loss: %.6f | time: %s' % \
                    (epoch, args.epochs, bidx, num_batch, running_loss, str(datetime.timedelta(seconds=acc_time)))

                print(log)
                saver.save_log(log)
                running_loss = 0.0

                print_lr(optimizer)

            if counter and counter % args.step_save == 0:
                # save
                saver.save_model(model)

            # counter increment
            counter += 1

    print('{:=^40}'.format(' Finish '))
    runtime = time.time() - time_start_train
    print('training time:', str(datetime.timedelta(seconds=runtime)) + '\n\n')