Ejemplo n.º 1
0
def main():
    # The dataset (987M) can be downloaded from
    # https://drive.google.com/file/d/1Wgeq3NZ4R1letnZEKLo-DTSSgcTsgkmq/view?usp=sharing
    # The dataset contains the feature of images in MSCOCO dataset
    # The data should be in the same folder as the code
    # Load COCO data from disk; this returns a dictionary
    small_data = coco_utils.load_coco_data(max_train=50)

    # Experiment with vanilla RNN
    small_rnn_model = CaptioningRNN(
          cell_type='rnn',
          word_to_idx=small_data['word_to_idx'],
          input_dim=small_data['train_features'].shape[1],
          hidden_dim=512,
          wordvec_dim=256,
    )

    small_rnn_solver = CaptioningSolver(small_rnn_model, small_data,
           update_rule='adam',
           num_epochs=50,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.95,
           verbose=True, print_every=10,
         )

    small_rnn_solver.train()

    # Plot the training losses
    plt.plot(small_rnn_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()
    plt.savefig('loss_rnn.png')
    plt.close()

    for split in ['train', 'val']:
        # some images might be deprecated. You may rerun the code several times
        # to successfully get the sample images from url.
        minibatch = coco_utils.sample_coco_minibatch(
            small_data, split=split, batch_size=2, seed=0)
        gt_captions, features, urls = minibatch
        gt_captions = coco_utils.decode_captions(gt_captions,
                                                 small_data['idx_to_word'])

        sample_captions = small_rnn_model.sample(features)
        sample_captions = coco_utils.decode_captions(sample_captions,
                                                     small_data['idx_to_word'])

        for i, (gt_caption, sample_caption, url) in enumerate(zip(gt_captions, sample_captions, urls)):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
            plt.savefig('%s_rnn_%d.png' % (split, i))
            plt.close()
Ejemplo n.º 2
0
def main():
    # Load COCO data from disk
    data = load_coco_data()
    # Create Caption Model
    model = CaptioningRNN(
        cell_type='lstm',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
        dtype=np.float32,
    )
    solver = CaptioningSolver(model,
                              data,
                              update_rule='adam',
                              num_epochs=1,
                              batch_size=100,
                              optim_config={
                                  'learning_rate': 5e-3,
                              },
                              lr_decay=0.995,
                              verbose=True,
                              print_every=10,
                              eval_every=100)
    solver.train()

    # Plot the training losses
    plt.rcParams['figure.figsize'] = (10.0, 8.0)  # set default size of plots
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['image.cmap'] = 'gray'

    plt.plot(solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()
Ejemplo n.º 3
0
small_data = load_coco_data(max_train=50)

small_lstm_model = CaptioningRNN(
          cell_type='lstm',
          word_to_idx=data['word_to_idx'],
          input_dim=data['train_features'].shape[1],
          hidden_dim=512,
          wordvec_dim=256,
          dtype=np.float32,
        )

small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
           update_rule='adam',
           num_epochs=40,
           batch_size=25,
           optim_config={
             'learning_rate': 6e-3,
           },
           lr_decay=0.995,
           verbose=True, print_every=10,
         )

small_lstm_solver.train()

# Plot the training losses
plt.plot(small_lstm_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

#LSTM test-time sampling
Ejemplo n.º 4
0
def main():
    # The dataset can be downloaded in https://drive.google.com/drive/folders/1zCq7kS9OXc2mgaOzDimAwiBblECWeBtO?usp=sharing
    # The dataset contains the feature of images in MSCOCO dataset
    # Load COCO data from disk; this returns a dictionary
    small_data = load_coco_data(max_train=50)

    # Experiment with vanilla RNN
    small_rnn_model = CaptioningRNN(
        cell_type='rnn',
        word_to_idx=small_data['word_to_idx'],
        input_dim=small_data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
    )

    small_rnn_solver = CaptioningSolver(
        small_rnn_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.95,
        verbose=True,
        print_every=10,
    )

    small_rnn_solver.train()

    # Plot the training losses
    plt.plot(small_rnn_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(small_data,
                                          split=split,
                                          batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, small_data['idx_to_word'])

        sample_captions = small_rnn_model.sample(features)
        sample_captions = decode_captions(sample_captions,
                                          small_data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()

    ##################################################################################################

    # Experiment with LSTM
    small_lstm_model = CaptioningRNN(
        cell_type='lstm',
        word_to_idx=small_data['word_to_idx'],
        input_dim=small_data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
        dtype=np.float32,
    )
    small_lstm_solver = CaptioningSolver(
        small_lstm_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.995,
        verbose=True,
        print_every=10,
    )

    small_lstm_solver.train()
    # Plot the training losses
    plt.plot(small_lstm_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(small_data,
                                          split=split,
                                          batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, small_data['idx_to_word'])

        sample_captions = small_lstm_model.sample(features)
        sample_captions = decode_captions(sample_captions,
                                          small_data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()