예제 #1
0
def train(**kwargs):

    device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')
    for k, v in kwargs.items():
        setattr(opt, k, v)

    dataloader = get_dataloader(opt)
    model = CaptionModel(opt, dataloader.dataset.word2ix,
                         dataloader.dataset.id2ix)
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path, map_location='cpu'))
    t.backends.cudnn.enabled = False
    model = model.to(device)

    optimizer = Adam(model.parameters(), opt.lr)
    criterion = t.nn.CrossEntropyLoss()
    for epoch in range(opt.max_epoch):
        for ii, (imgs, (captions, lengths),
                 indexes) in tqdm.tqdm(enumerate(dataloader)):

            imgs = Variable(imgs).to(device)
            captions = Variable(captions).to(device)
            pred, _ = model(imgs, captions, lengths)
            target_captions = pack_padded_sequence(captions, lengths)[0]

            loss = criterion(pred, target_captions)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("Current Loss: ", loss.item())
        if (epoch + 1) % opt.save_model == 0:
            t.save(model.state_dict(), "checkpoints/{}.pth".format(epoch))
예제 #2
0
def main():
    global args
    args = parser.parse_args()
    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))
    checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar')

    logging.debug("run arguments: %s", args)
    logging.info("using pretrained cnn %s", args.cnn)
    cnn = resnet.__dict__[args.cnn](pretrained=True)

    vocab = build_vocab()
    model = CaptionModel(cnn, vocab,
                         embedding_size=args.embedding_size,
                         rnn_size=args.rnn_size,
                         num_layers=args.num_layers,
                         share_embedding_weights=args.share_weights)

    train_data = get_iterator(get_coco_data(vocab, train=True),
                              batch_size=args.batch_size,
                              max_length=args.max_length,
                              shuffle=True,
                              num_workers=args.workers)
    val_data = get_iterator(get_coco_data(vocab, train=False),
                            batch_size=args.eval_batch_size,
                            max_length=args.max_length,
                            shuffle=False,
                            num_workers=args.workers)

    if 'cuda' in args.type:
        cudnn.benchmark = True
        model.cuda()

    optimizer = select_optimizer(
        args.optimizer, params=model.parameters(), lr=args.lr)
    regime = lambda e: {'lr': args.lr * (args.lr_decay ** e),
                        'momentum': args.momentum,
                        'weight_decay': args.weight_decay}
    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in args.type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()

        end = time.time()
        for i, (imgs, (captions, lengths)) in enumerate(data):
            data_time.update(time.time() - end)
            if use_cuda:
                imgs = imgs.cuda()
                captions = captions.cuda(async=True)
            imgs = Variable(imgs, volatile=not training)
            captions = Variable(captions, volatile=not training)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            perplexity.update(math.exp(err.data[0]))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), args.grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t'
                             'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                             'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                             'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                                 epoch, i, len(data),
                                 phase='TRAINING' if training else 'EVALUATING',
                                 batch_time=batch_time,
                                 data_time=data_time, perp=perplexity))

        return perplexity.avg

    for epoch in range(args.start_epoch, args.epochs):
        if epoch >= args.finetune_epoch:
            model.finetune_cnn(True)
        optimizer = adjust_optimizer(
            optimizer, epoch, regime)
        # Train
        train_perp = forward(
            model, train_data, training=True, optimizer=optimizer)
        # Evaluate
        val_perp = forward(model, val_data, training=False)

        logging.info('\n Epoch: {0}\t'
                     'Training Perplexity {train_perp:.4f} \t'
                     'Validation Perplexity {val_perp:.4f} \n'
                     .format(epoch + 1, train_perp=train_perp, val_perp=val_perp))
        model.save_checkpoint(checkpoint_file % (epoch + 1))
예제 #3
0
    logger.info('Building model...')
    model = CaptionModel(opt)

    xe_criterion = CrossEntropyCriterion()
    rl_criterion = RewardCriterion()

    if torch.cuda.is_available():
        model.cuda()
        xe_criterion.cuda()
        rl_criterion.cuda()

    logger.info('Start training...')
    start = datetime.now()

    optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate)
    infos = train(model,
                  xe_criterion,
                  optimizer,
                  train_loader,
                  val_loader,
                  opt,
                  rl_criterion=rl_criterion)
    logger.info('Best val %s score: %f. Best iter: %d. Best epoch: %d',
                opt.eval_metric, infos['best_score'], infos['best_iter'],
                infos['best_epoch'])

    logger.info('Training time: %s', datetime.now() - start)

    if opt.result_file:
        logger.info('Start testing...')
                                 shuffle=False,
                                 num_workers=workers)

    model = CaptionModel(cnn,
                         vocab,
                         embedding_size=embedding_size,
                         rnn_size=rnn_size,
                         num_layers=num_layers,
                         share_embedding_weights=share_weights)

    if 'cuda' in type:
        cudnn.benchmark = True
        model.cuda()

    optimizer = select_optimizer(optimizer,
                                 params=model.parameters(),
                                 lr=learning_rate)
    regime = lambda e: {
        'lr': learning_rate * (lr_decay**e),
        'momentum': momentum,
        'weight_decay': weight_decay
    }
    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()
예제 #5
0
                                               transform=mytransform,
                                               train=True)
    flicker8k_val = FlickrDataLoader.Flicker8k(img_dir,
                                               cap_path,
                                               val_txt,
                                               transform=mytransform,
                                               train=True)
    with open('feat6k.npy', 'r') as f:
        feat_tr = np.load(f)

    with open('capt6k.pkl', 'r') as f:
        caption_trn = pickle.load(f)

    with open('feat.pkl', 'r') as f:
        feat_val = pickle.load(f)

    with open('capt1k.pkl', 'r') as f:
        caption_val = pickle.load(f)

    model = CaptionModel(bsz=1,
                         feat_dim=(196, 512),
                         n_voc=5834,
                         n_embed=512,
                         n_hidden=1024).cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train(epoches=1)
    with open('model_t.pth', 'r') as f:
        model.load_state_dict(torch.load(f))
예제 #6
0
    train_loader = DataLoader(train_dataset, batch_size, collate_fn=my_collate)

    caption_model = CaptionModel(model_info['vocab_size'],
                                 embedding_dim,
                                 hidden_size=hidden_size,
                                 embedding_matrix=embedding_matrix,
                                 embedding_train=True)

    init_weights(caption_model, embedding_pretrained=True)

    caption_model.to(device)

    # we will ignore the pad token in true target set
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    optimizer = torch.optim.Adam(caption_model.parameters(), lr=0.01)

    clip = 1
    start = time()
    print(f'Training...')
    for i in tqdm(range(EPOCHS * 6)):

        loss = train(caption_model, train_loader, optimizer, criterion, clip,
                     model_info['vocab_size'])
        print(f'loss = {loss}')

    # reduce the learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = 1e-4

    for i in tqdm(range(EPOCHS * 6)):