Esempio n. 1
0
def check_gradien_on_captioning_rnn():
    """
    perform numeric gradient checking on the CaptioningRNN class; you should see errors
    around the order of e-6 or less.
    """
    np.random.seed(231)

    batch_size = 2
    timesteps = 3
    input_dim = 4
    wordvec_dim = 5
    hidden_dim = 6
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    vocab_size = len(word_to_idx)

    captions = np.random.randint(vocab_size, size=(batch_size, timesteps))
    features = np.random.randn(batch_size, input_dim)

    model = CaptioningRNN(word_to_idx,
                          input_dim=input_dim,
                          wordvec_dim=wordvec_dim,
                          hidden_dim=hidden_dim,
                          cell_type='rnn',
                          dtype=np.float64)

    loss, grads = model.loss(features, captions)

    for param_name in sorted(grads):
        f = lambda _: model.loss(features, captions)[0]
        param_grad_num = eval_numerical_gradient(f,
                                                 model.params[param_name],
                                                 verbose=False,
                                                 h=1e-6)
        e = rel_error(param_grad_num, grads[param_name])
        print('%s relative error: %e' % (param_name, e))
Esempio n. 2
0
def check_lstm_captioning_model():
    """You should see a difference on the order of e-10 or less."""
    N, D, W, H = 10, 20, 30, 40
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    V = len(word_to_idx)
    T = 13

    model = CaptioningRNN(word_to_idx,
                          input_dim=D,
                          wordvec_dim=W,
                          hidden_dim=H,
                          cell_type='lstm',
                          dtype=np.float64)

    # Set all model parameters to fixed values
    for k, v in model.params.items():
        model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

    features = np.linspace(-0.5, 1.7, num=N * D).reshape(N, D)
    captions = (np.arange(N * T) % V).reshape(N, T)

    loss, grads = model.loss(features, captions)
    expected_loss = 9.82445935443

    print('loss: ', loss)
    print('expected loss: ', expected_loss)
    print('difference: ', abs(loss - expected_loss))
Esempio n. 3
0
def check_rnn_for_image_captioning():
    """
    Now that you have implemented the necessary layers, you can combine them to build an
    image captioning model. Open the file cs231n/classifiers/rnn.py and look at the
    CaptioningRNN class.

    Implement the forward and backward pass of the model in the loss function. For now
    you only need to implement the case where cell_type='rnn' for vanialla RNNs; you will
    implement the LSTM case later. After doing so, run the following to check your
    forward pass using a small test case; you should see error on the order of e-10 or less.
    """
    N, D, W, H = 10, 20, 30, 40
    word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
    V = len(word_to_idx)
    T = 13

    model = CaptioningRNN(word_to_idx,
                          input_dim=D,
                          wordvec_dim=W,
                          hidden_dim=H,
                          cell_type='rnn',
                          dtype=np.float64)

    # Set all model parameters to fixed values
    for k, v in model.params.items():
        model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

    features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
    captions = (np.arange(N * T) % V).reshape(N, T)

    loss, grads = model.loss(features, captions)
    expected_loss = 9.83235591003

    print('loss: ', loss)
    print('expected loss: ', expected_loss)
    print('difference: ', abs(loss - expected_loss))
Esempio n. 4
0
model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='lstm',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.iteritems():
  model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-0.5, 1.7, num=N*D).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.82445935443

print 'loss: ', loss
print 'expected loss: ', expected_loss
print 'difference: ', abs(loss - expected_loss)

small_data = load_coco_data(max_train=50)

small_lstm_model = CaptioningRNN(
          cell_type='lstm',
          word_to_idx=data['word_to_idx'],
          input_dim=data['train_features'].shape[1],
          hidden_dim=512,
          wordvec_dim=256,
          dtype=np.float32,
Esempio n. 5
0
model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='rnn',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.items():
    model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.83235591003

print('loss: ', loss)
print('expected loss: ', expected_loss)
print('difference: ', abs(loss - expected_loss))


# Run the following cell to perform numeric gradient checking on the `CaptioningRNN` class; you should errors around `5e-6` or less.

# In[ ]:

np.random.seed(231)

batch_size = 2
timesteps = 3
model = CaptioningRNN(word_to_idx,
          input_dim=D,
          wordvec_dim=W,
          hidden_dim=H,
          cell_type='rnn',
          dtype=np.float64)

# Set all model parameters to fixed values
for k, v in model.params.iteritems():
  model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape)

features = np.linspace(-1.5, 0.3, num=(N * D)).reshape(N, D)
captions = (np.arange(N * T) % V).reshape(N, T)

loss, grads = model.loss(features, captions)
expected_loss = 9.83235591003

print 'loss: ', loss
print 'expected loss: ', expected_loss
print 'difference: ', abs(loss - expected_loss)

batch_size = 2
timesteps = 3
input_dim = 4
wordvec_dim = 5
hidden_dim = 6
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
vocab_size = len(word_to_idx)

captions = np.random.randint(vocab_size, size=(batch_size, timesteps))