Exemple #1
0
def check_gradien_on_captioning_rnn():
    """
    perform numeric gradient checking on the CaptioningRNN class; you should see errors
    around the order of e-6 or less.
    """
    np.random.seed(231)

    batch_size = 2
    timesteps = 3
    input_dim = 4
    wordvec_dim = 5
    hidden_dim = 6
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    vocab_size = len(word_to_idx)

    captions = np.random.randint(vocab_size, size=(batch_size, timesteps))
    features = np.random.randn(batch_size, input_dim)

    model = CaptioningRNN(word_to_idx,
                          input_dim=input_dim,
                          wordvec_dim=wordvec_dim,
                          hidden_dim=hidden_dim,
                          cell_type='rnn',
                          dtype=np.float64)

    loss, grads = model.loss(features, captions)

    for param_name in sorted(grads):
        f = lambda _: model.loss(features, captions)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 model.params[param_name],
                                                 verbose=False,
                                                 h=1e-6)
        e = rel_error(param_grad_num, grads[param_name])
        print('%s relative error: %e' % (param_name, e))
Exemple #2
0
def check_lstm_captioning_model():
    """You should see a difference on the order of e-10 or less."""
    N, D, W, H = 10, 20, 30, 40
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    V = len(word_to_idx)
    T = 13

    model = CaptioningRNN(word_to_idx,
                          input_dim=D,
                          wordvec_dim=W,
                          hidden_dim=H,
                          cell_type='lstm',
                          dtype=np.float64)

    # Set all model parameters to fixed values
    for k, v in model.params.items():
        model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

    features = np.linspace(-0.5, 1.7, num=N * D).reshape(N, D)
    captions = (np.arange(N * T) % V).reshape(N, T)

    loss, grads = model.loss(features, captions)
    expected_loss = 9.82445935443

    print('loss: ', loss)
    print('expected loss: ', expected_loss)
    print('difference: ', abs(loss - expected_loss))
Exemple #3
0
def overfit_small_data():
    """
    Similar to the Solver class that we used to train image classification models on the
    previous assignment, on this assignment we use a CaptioningSolver class to train
    image captioning models. Open the file cs231n/captioning_solver.py and read through
    the CaptioningSolver class; it should look very familiar.

    Once you have familiarized yourself with the API, run the following to make sure your
    model overfits a small sample of 100 training examples. You should see a final loss
    of less than 0.1.
    """
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_rnn_model = CaptioningRNN(
        cell_type='rnn',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
    )
    small_rnn_solver = CaptioningSolver(
        small_rnn_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.95,
        verbose=True,
        print_every=10,
    )
    small_rnn_solver.train()

    # Plot the training losses
    plt.plot(small_rnn_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        gt_captions, features, urls = sample_coco_minibatch(small_data,
                                                            split=split,
                                                            batch_size=2)
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_rnn_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
Exemple #4
0
def overfit_lstm_captioning_model():
    """You should see a final loss less than 0.5."""
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_lstm_model = CaptioningRNN(
        cell_type='lstm',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
        dtype=np.float32,
    )

    small_lstm_solver = CaptioningSolver(
        small_lstm_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.995,
        verbose=True,
        print_every=10,
    )

    small_lstm_solver.train()

    # Plot the training losses
    plt.plot(small_lstm_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(small_data,
                                          split=split,
                                          batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_lstm_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
    def __init__(self, data, max_train_data=2000, pca=True, hidden_dim=512, wordvec_dim=256, num_epochs=50, batch_size=50, lr=5e-3, lr_decay=0.997):
        
        np.random.seed(231)

        self.small_data = load_coco_data(max_train=max_train_data, pca_features=pca)

        self.small_lstm_model = CaptioningRNN(
                  cell_type='lstm',
                  word_to_idx=data['word_to_idx'],
                  input_dim=data['train_features'].shape[1],
                  hidden_dim=hidden_dim,
                  wordvec_dim=wordvec_dim,
                  dtype=np.float32,
                )
        
        self.small_lstm_solver = CaptioningSolver(self.small_lstm_model, self.small_data,
                   update_rule='adam',
                   num_epochs=50,
                   batch_size=50,
                   optim_config={
                     'learning_rate': lr,
                   },
                   lr_decay=lr_decay,
                   verbose=True, print_every=10,
                 )
Exemple #6
0
def check_rnn_for_image_captioning():
    """
    Now that you have implemented the necessary layers, you can combine them to build an
    image captioning model. Open the file cs231n/classifiers/rnn.py and look at the
    CaptioningRNN class.

    Implement the forward and backward pass of the model in the loss function. For now
    you only need to implement the case where cell_type='rnn' for vanialla RNNs; you will
    implement the LSTM case later. After doing so, run the following to check your
    forward pass using a small test case; you should see error on the order of e-10 or less.
    """
    N, D, W, H = 10, 20, 30, 40
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    V = len(word_to_idx)
    T = 13

    model = CaptioningRNN(word_to_idx,
                          input_dim=D,
                          wordvec_dim=W,
                          hidden_dim=H,
                          cell_type='rnn',
                          dtype=np.float64)

    # Set all model parameters to fixed values
    for k, v in model.params.items():
        model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

    features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
    captions = (np.arange(N * T) % V).reshape(N, T)

    loss, grads = model.loss(features, captions)
    expected_loss = 9.83235591003

    print('loss: ', loss)
    print('expected loss: ', expected_loss)
    print('difference: ', abs(loss - expected_loss))
db_num = eval_numerical_gradient_array(fb, b, dout)

print 'dx error: ', rel_error(dx_num, dx)
print 'dh0 error: ', rel_error(dx_num, dx)
print 'dWx error: ', rel_error(dx_num, dx)
print 'dWh error: ', rel_error(dx_num, dx)
print 'db error: ', rel_error(dx_num, dx)

N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='lstm',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.iteritems():
  model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-0.5, 1.7, num=N*D).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.82445935443

print 'loss: ', loss
print 'expected loss: ', expected_loss
Exemple #8
0
        print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))


max_train = 10000
batch_size = 128
num_epochs = 1

data = load_coco_data(pca_features=True)
#np.random.seed(231)

small_data = load_coco_data(max_train=max_train)
############################################################
small_lstm_model = CaptioningRNN(
    cell_type='lstm',
    word_to_idx=data['word_to_idx'],
    input_dim=data['train_features'].shape[1],
    hidden_dim=512,
    wordvec_dim=256,
    dtype=np.float32,
)

small_lstm_solver = CaptioningSolver(
    small_lstm_model,
    small_data,
    update_rule='adam',
    num_epochs=num_epochs,
    batch_size=batch_size,
    optim_config={
        'learning_rate': 5e-3,
    },
    lr_decay=0.995,
    verbose=True,
Exemple #9
0
# # RNN for image captioning
# Now that you have implemented the necessary layers, you can combine them to build an image captioning model. Open the file `cs231n/classifiers/rnn.py` and look at the `CaptioningRNN` class.
#
# Implement the forward and backward pass of the model in the `loss` function. For now you only need to implement the case where `cell_type='rnn'` for vanialla RNNs; you will implement the LSTM case later. After doing so, run the following to check your forward pass using a small test case; you should see error less than `1e-10`.

# In[ ]:

N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
                      input_dim=D,
                      wordvec_dim=W,
                      hidden_dim=H,
                      cell_type='rnn',
                      dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.items():
    model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.83235591003

print('loss: ', loss)
print('expected loss: ', expected_loss)
## Similar to the `Solver` class that we used to train image classification m
#odels on the previous assignment, on this assignment we use a `CaptioningSolver` 
#class to train image captioning models. Open the file `cs231n/captioning_solver.py` 
#and read through the `CaptioningSolver` class; it should look very familiar.
## 
## Once you have familiarized yourself with the API, run the following to make 
#sure your model overfit a small sample of 100 training examples. You should see losses around 1.
#
## In[ ]:

small_data = load_coco_data(max_train=50)

small_rnn_model = CaptioningRNN(
          cell_type='rnn',
          word_to_idx=data['word_to_idx'],
          input_dim=data['train_features'].shape[1],
          hidden_dim=512,
          wordvec_dim=256,
        )

small_rnn_solver = CaptioningSolver(small_rnn_model, small_data,
           update_rule='adam',
           num_epochs=1,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.95,
           verbose=True, print_every=10,
         )
Exemple #11
0
captions = (np.arange(N * T) % V).reshape(N, T)
loss, grads = model.loss(features, captions)
expected_loss = 9.82445935443

print 'loss: ', loss
print 'expected loss: ', expected_loss
print 'difference: ', abs(loss - expected_loss)
'''

# train
small_data = load_coco_data(max_train=50)

small_lstm_model = CaptioningRNN(
                  cell_type='lstm',
                            word_to_idx=data['word_to_idx'],
                                      input_dim=data['train_features'].shape[1],
                                                hidden_dim=512,
                                                          wordvec_dim=256,
                                                                    dtype=np.float32,
                                                                            )

small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
                   update_rule='adam',
                              num_epochs=50,
                                         batch_size=25,
                                                    optim_config={
                                                                     'learning_rate': 5e-3,
                                                                                },
                                                               lr_decay=0.995,
                                                                          verbose=True, print_every=10,
                                                                                   )
'''
batch_size = 2
timesteps = 3
input_dim = 4
wordvec_dim = 5
hidden_dim = 6
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
vocab_size = len(word_to_idx)

captions = np.random.randint(vocab_size, size=(batch_size, timesteps))
features = np.random.randn(batch_size, input_dim)

model = CaptioningRNN(
    word_to_idx,
    input_dim=input_dim,
    wordvec_dim=wordvec_dim,
    hidden_dim=hidden_dim,
    cell_type='rnn',
    dtype=np.float64,
)

loss, grads = model.loss(features, captions)

for param_name in sorted(grads):
    f = lambda _: model.loss(features, captions)[0]
    param_grad_num = eval_numerical_gradient(f,
                                             model.params[param_name],
                                             verbose=False,
                                             h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s relative error: %e' % (param_name, e)
    print k, type(v), v.shape, v.dtype
  else:
    print k, type(v), len(v)




from cs231n.rnn_layers import rnn_step_forward, rnn_step_backward
N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='rnn',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.iteritems():
  model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.83235591003

print 'loss: ', loss
print 'expected loss: ', expected_loss
Exemple #14
0
db_num = eval_numerical_gradient_array(fb, b, dout)

print 'dx error: ', rel_error(dx_num, dx)
print 'dh0 error: ', rel_error(dx_num, dx)
print 'dWx error: ', rel_error(dx_num, dx)
print 'dWh error: ', rel_error(dx_num, dx)
print 'db error: ', rel_error(dx_num, dx)

N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
T = 13

model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='lstm',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.iteritems():
  model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-0.5, 1.7, num=N*D).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.82445935443

print 'loss: ', loss
print 'expected loss: ', expected_loss
Exemple #15
0
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
get_ipython().magic(u'load_ext autoreload')
get_ipython().magic(u'autoreload 2')

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

data = load_coco_data(pca_features=True)

big_coco_data = load_coco_data(max_train=500)

big_lstm_model = CaptioningRNN(
    cell_type='lstm',
    word_to_idx=data['word_to_idx'],
    input_dim=data['train_features'].shape[1],
    hidden_dim=1024,
    wordvec_dim=512,
    dtype=np.float32,
)

big_lstm_solver = CaptioningSolver(big_lstm_model, big_coco_data,
                                   update_rule='adam',
                                   num_epochs=30,
                                   batch_size=25,
                                   optim_config={'learning_rate': 5e-3,},
                                   lr_decay=0.995,
                                   verbose=True, print_every=100,
                                  )

big_lstm_solver.train()