Beispiel #1
0
attention_dim = 512  # dimension of attention linear layers
decoder_dim = 512  # dimension of decoder RNN
dropout = 0.1
device = torch.device("cuda" if torch.cuda.is_available() else
                      "cpu")  # sets device for model and PyTorch tensors
encoder_lr = 5e-4  # learning rate for encoder if fine-tuning
decoder_lr = 5e-4  # learning rate for decoder
grad_clip = 5.  # clip gradients at an absolute value of
alpha_c = 1.  # regularization parameter for 'doubly stochastic attention', as in the paper
fine_tune_encoder = True  # fine-tune encoder?
decoder = DecoderWithAttention(attention_dim=attention_dim,
                               embed_dim=emb_dim,
                               decoder_dim=decoder_dim,
                               vocab_size=len(vocab),
                               dropout=dropout)
decoder.fine_tune_embeddings(True)
decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad,
                                                   decoder.parameters()),
                                     lr=decoder_lr)
encoder = Encoder()
encoder.fine_tune(fine_tune_encoder)
encoder_optimizer = torch.optim.Adam(
    params=filter(lambda p: p.requires_grad, encoder.parameters()),
    lr=encoder_lr) if fine_tune_encoder else None

decoder_sched = torch.optim.lr_scheduler.CosineAnnealingLR(decoder_optimizer,
                                                           5,
                                                           eta_min=1e-5,
                                                           last_epoch=-1)
encoder_sched = torch.optim.lr_scheduler.CosineAnnealingLR(encoder_optimizer,
                                                           5,
def main():
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, rev_word_map

    # Read word map
    word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json')
    with open(word_map_file, 'r') as j:
        word_map = json.load(j)

    rev_word_map = {v: k for k, v in word_map.items()}

    # Initialize / load checkpoint
    if checkpoint is None:
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        pretrained_embs, pretrained_embs_dim = load_embeddings(
            '/home/Iwamura/datasets/datasets/GloVe/glove.6B.300d.txt',
            word_map)
        assert pretrained_embs_dim == decoder.embed_dim
        decoder.load_pretrained_embeddings(pretrained_embs)
        decoder.fine_tune_embeddings(True)

        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder_opt = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_opt.fine_tune(fine_tune_encoder_opt)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None
        encoder_optimizer_opt = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder_opt.parameters()),
            lr=encoder_opt_lr) if fine_tune_encoder_opt else None

    else:

        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder_opt = checkpoint['encoder_opt']
        encoder_optimizer_opt = checkpoint['encoder_optimizer_opt']

        # if fine_tune_encoder is True and encoder_optimizer is None and encoder_optimizer_opt is None
        if fine_tune_encoder_opt is True and encoder_optimizer_opt is None:
            encoder_opt.fine_tune(fine_tune_encoder_opt)

            encoder_optimizer_opt = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder_opt.parameters()),
                                                     lr=encoder_opt_lr)

    # Move to GPU, if available
    decoder = decoder.to(device)

    encoder_opt = encoder_opt.to(device)

    # Loss function
    criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders

    normalize_opt = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'TRAIN',
        transform=transforms.Compose([normalize_opt])),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'VAL',
        transform=transforms.Compose([normalize_opt])),
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=workers,
                                             pin_memory=True)

    # Epochs
    for epoch in range(start_epoch, epochs):

        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 10:
            break
        if epoch > 0 and epoch % 4 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)

            if fine_tune_encoder_opt:
                adjust_learning_rate(encoder_optimizer_opt, 0.8)

        # One epoch's training
        train(train_loader=train_loader,
              encoder_opt=encoder_opt,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer_opt=encoder_optimizer_opt,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        # One epoch's validation
        recent_bleu4 = validate(val_loader=val_loader,
                                encoder_opt=encoder_opt,
                                decoder=decoder,
                                criterion=criterion)

        # Check if there was an improvement
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(data_name, epoch, epochs_since_improvement,
                        encoder_opt, decoder, encoder_optimizer_opt,
                        decoder_optimizer, recent_bleu4, is_best)
Beispiel #3
0
def fit(t_params, checkpoint=None, m_params=None, logger=None):

    # info
    data_name = t_params['data_name']
    imgs_path = t_params['imgs_path']
    df_path = t_params['df_path']
    vocab = t_params['vocab']

    start_epoch = 0
    epochs_since_improvement = 0
    best_bleu4 = 0
    epochs = t_params['epochs']
    batch_size = t_params['batch_size']
    workers = t_params['workers']
    encoder_lr = t_params['encoder_lr']
    decoder_lr = t_params['decoder_lr']
    fine_tune_encoder = t_params['fine_tune_encoder']

    # pretrained word embeddings
    pretrained_embeddings = t_params['pretrained_embeddings']
    if pretrained_embeddings:
        fine_tune_embeddings = t_params['fine_tune_embeddings']
        embeddings_matrix = m_params['embeddings_matrix']

    # init / load checkpoint
    if checkpoint is None:

        # getting hyperparameters
        attention_dim = m_params['attention_dim']
        embed_dim = m_params['embed_dim']
        decoder_dim = m_params['decoder_dim']
        encoder_dim = m_params['encoder_dim']
        dropout = m_params['dropout']

        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=embed_dim,
                                       decoder_dim=decoder_dim,
                                       encoder_dim=encoder_dim,
                                       vocab_size=len(vocab),
                                       dropout=dropout)

        if pretrained_embeddings:
            decoder.load_pretrained_embeddings(
                torch.tensor(embeddings_matrix, dtype=torch.float32))
            decoder.fine_tune_embeddings(fine_tune=fine_tune_embeddings)

        decoder_optimizer = torch.optim.RMSprop(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                                lr=decoder_lr)

        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.RMSprop(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None

    # load checkpoint
    else:
        checkpoint = torch.load(checkpoint)
        print('Loaded Checkpoint!!')
        start_epoch = checkpoint['epoch'] + 1
        print(f"Starting Epoch: {start_epoch}")
        epochs_since_improvement = checkpoint['epochs_since_imrovment']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['deocder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.RMSprop(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                    lr=encoder_lr)

    # Schedulers
    decoder_scheduler = ReduceLROnPlateau(decoder_optimizer,
                                          patience=2,
                                          verbose=True)
    if fine_tune_encoder:
        encoder_scheduler = ReduceLROnPlateau(encoder_optimizer,
                                              patience=2,
                                              verbose=True)

    # move to gpu, if available
    decoder = decoder.to(device)
    encoder = encoder.to(device)

    # loss function
    criterion = nn.CrossEntropyLoss().to(device)

    # dataloaders
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    print('Loading Data')
    train_loader, val_loader = get_loaders(batch_size, imgs_path, df_path,
                                           transform, vocab, False, workers)
    print('_' * 50)

    print('-' * 20, 'Fitting', '-' * 20)
    for epoch in range(start_epoch, epochs):

        # if epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0:
        #     adjust_learning_rate(decoder_optimizer, 0.8)
        #     if fine_tune_encoder:
        #         adjust_learning_rate(encoder_optimizer, 0.8)

        print('_' * 50)
        print('-' * 20, 'Training', '-' * 20)
        # one epoch of training
        epoch_time = AverageMeter()
        start_time = time.time()
        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch,
              logger=logger)
        epoch_time.update(time.time() - start_time)
        print(f"Epoch train time {epoch_time.val:.3f} (epoch_time.avg:.3f)")

        # one epoch of validation
        epoch_time = AverageMeter()
        start_time = time.time()
        print('-' * 20, 'Validation', '-' * 20)
        b1, b2, b3, recent_bleu4 = validate(val_loader=val_loader,
                                            encoder=encoder,
                                            decoder=decoder,
                                            criterion=criterion,
                                            vocab=vocab,
                                            epoch=epoch,
                                            logger=logger)
        epoch_time.update(time.time() - start_time)
        # tensorboard
        logger.add_scalar(f'b-1/valid', b1, epoch)
        logger.add_scalar(f'b-2/valid', b2, epoch)
        logger.add_scalar(f'b-3/valid', b3, epoch)
        logger.add_scalar(f'b-4/valid', recent_bleu4, epoch)
        # logger.add_scalar(f'Meteor/valid', m, epoch)
        print(
            f"Epoch validation time {epoch_time.val:.3f} (epoch_time.avg:.3f)")

        # check for improvement
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print(
                f'\nEpochs since last improvement: {epochs_since_improvement,}'
            )
        else:
            # reset
            epochs_since_improvement = 0

        # stop training if no improvement for 5 epochs
        if epochs_since_improvement == 5:
            print('No improvement for 5 consecutive epochs, terminating...')
            break

        # learning rate schedular
        decoder_scheduler.step(recent_bleu4)
        if fine_tune_encoder:
            encoder_scheduler.step(recent_bleu4)

        save_checkpoint(data_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
Beispiel #4
0
def main():
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, glove_path, emb_dim, rev_word_map

    # Read word map
    word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json')
    with open(word_map_file, 'r') as j:
        word_map = json.load(j)

    rev_word_map = {v: k for k, v in word_map.items()}
    #get glove
    vectors = bcolz.open(f'{glove_path}/6B.300.dat')[:]
    words = pickle.load(open(f'{glove_path}/6B.300_words.pkl', 'rb'))
    word2idx = pickle.load(open(f'{glove_path}/6B.300_idx.pkl', 'rb'))

    glove = {w: vectors[word2idx[w]] for w in words}
    matrix_len = len(word_map)
    weights_matrix = np.zeros((matrix_len, emb_dim))
    words_found = 0

    for i, word in enumerate(word_map.keys()):
        try:
            weights_matrix[i] = glove[word]
            words_found += 1
        except KeyError:
            weights_matrix[i] = np.random.normal(scale=0.6, size=(emb_dim, ))
#     weights_matrix = np.float64(weights_matrix)
#     weights_matrix = torch.from_numpy(weights_matrix)
#     pretrained_embedding = weights_matrix.to(dtype=torch.float)
#     print(pretrained_embedding.dtype)
#     if device.type == 'cpu' :
#         pretrained_embedding =  torch.FloatTensor(weights_matrix)
#     else:
#         pretrained_embedding =  torch.cuda.FloatTensor(weights_matrix)
        pretrained_embedding = torch.FloatTensor(weights_matrix)

    # Initialize / load checkpoint
    if checkpoint is None:
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        decoder.load_pretrained_embeddings(
            pretrained_embedding
        )  # pretrained_embeddings should be of dimensions (len(word_map), emb_dim)
        decoder.fine_tune_embeddings(True)  # or False
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    # Move to GPU, if available
    decoder = decoder.to(device)
    encoder = encoder.to(device)

    # Loss function
    criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'TRAIN',
        transform=transforms.Compose([normalize])),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(CaptionDataset(
        data_folder,
        data_name,
        'VAL',
        transform=transforms.Compose([normalize])),
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=workers,
                                             pin_memory=True)

    # Epochs
    for epoch in range(start_epoch, epochs):

        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        # One epoch's training


#         train(train_loader=train_loader,
#               encoder=encoder,
#               decoder=decoder,
#               criterion=criterion,
#               encoder_optimizer=encoder_optimizer,
#               decoder_optimizer=decoder_optimizer,
#               epoch=epoch)

# One epoch's validation
        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        # Check if there was an improvement
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(data_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
Beispiel #5
0
def main():
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    # Read word map
    word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json')
    with open(word_map_file, 'r') as j:
        word_map = json.load(j)

    # Initialize / load checkpoint
    if checkpoint is None:

        emb_dim=100 #remove if not usiong pretrained model
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        pretrained_embeddings = decoder.create_pretrained_embedding_matrix(word_map)
        decoder.load_pretrained_embeddings(
            pretrained_embeddings)  # pretrained_embeddings should be of dimensions (len(word_map), emb_dim)
        decoder.fine_tune_embeddings(True)

        decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
                                             lr=encoder_lr) if fine_tune_encoder else None

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    # Move to GPU, if available
    decoder = decoder.to(device)
    encoder = encoder.to(device)

    # Loss function
    criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_loader = torch.utils.data.DataLoader(
        CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])),
        batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)
    val_loader = torch.utils.data.DataLoader(
        CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])),
        batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)

    # Epochs
    for epoch in range(start_epoch, epochs):

        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        # One epoch's training
        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        # One epoch's validation
        recent_bleu4, val_loss_avg, val_accu_avg = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)
        #write to tensorboard
        writer.add_scalar('validation_loss', val_loss_avg, epoch)
        writer.add_scalar('validation_accuracy', val_accu_avg, epoch)
        writer.add_scalar('validation_bleu4', recent_bleu4, epoch)

        # Check if there was an improvement
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        print("Saving model to file",ckpt_name.format(epoch, bleu=recent_bleu4, loss=val_loss_avg, acc=val_accu_avg))
        save_checkpoint(ckpt_name.format(epoch, bleu=recent_bleu4, loss=val_loss_avg, acc=val_accu_avg), 
                        epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer,
                        decoder_optimizer, recent_bleu4, is_best)

    #close tensorboard writer
    writer.close()