Ejemplo n.º 1
0
 def predict(self, X):
     N, T, D = X.shape
     h0 = np.zeros((N, self.hidden_dim))
     layer1, l1cache = rnn_layers.rnn_forward(X, h0, self.Wx, self.Wh,
                                              self.b, self.non_liniearity)
     final_layer = (layer1[:, T - 1, :])
     layer2, _ = layers.dense_forward(final_layer, self.W1, self.b1)
     return np.argmax(layer2, axis=1)
def test_rnn_layer():
    N, D, T, H = 2, 3, 10, 5

    x = np.random.randn(N, T, D)
    h0 = np.random.randn(N, H)
    Wx = np.random.randn(D, H)
    Wh = np.random.randn(H, H)
    b = np.random.randn(H)

    out, cache = rnn_layers.rnn_forward(x, h0, Wx, Wh, b)
    dout = np.random.randn(*out.shape)

    dx, dh0, dWx, dWh, db = rnn_layers.rnn_backward(dout, cache)

    fx = lambda x: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fh0 = lambda h0: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fWx = lambda Wx: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fWh = lambda Wh: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fb = lambda b: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]

    dx_num = eval_numerical_gradient_array(fx, x, dout)
    dh0_num = eval_numerical_gradient_array(fh0, h0, dout)
    dWx_num = eval_numerical_gradient_array(fWx, Wx, dout)
    dWh_num = eval_numerical_gradient_array(fWh, Wh, dout)
    db_num = eval_numerical_gradient_array(fb, b, dout)

    print 'Testing rnn layers'
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dh0 error: ', rel_error(dh0_num, dh0)
    print 'dWx error: ', rel_error(dWx_num, dWx)
    print 'dWh error: ', rel_error(dWh_num, dWh)
    print 'db error: ', rel_error(db_num, db)
def test_rnn_layer():
    N, D, T, H = 2, 3, 10, 5

    x = np.random.randn(N, T, D)
    h0 = np.random.randn(N, H)
    Wx = np.random.randn(D, H)
    Wh = np.random.randn(H, H)
    b = np.random.randn(H)

    out, cache = rnn_layers.rnn_forward(x, h0, Wx, Wh, b)
    dout = np.random.randn(*out.shape)

    dx, dh0, dWx, dWh, db = rnn_layers.rnn_backward(dout, cache)

    fx = lambda x: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fh0 = lambda h0: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fWx = lambda Wx: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fWh = lambda Wh: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]
    fb = lambda b: rnn_layers.rnn_forward(x, h0, Wx, Wh, b)[0]

    dx_num = eval_numerical_gradient_array(fx, x, dout)
    dh0_num = eval_numerical_gradient_array(fh0, h0, dout)
    dWx_num = eval_numerical_gradient_array(fWx, Wx, dout)
    dWh_num = eval_numerical_gradient_array(fWh, Wh, dout)
    db_num = eval_numerical_gradient_array(fb, b, dout)

    print 'Testing rnn layers'
    print 'dx error: ', rel_error(dx_num, dx)
    print 'dh0 error: ', rel_error(dh0_num, dh0)
    print 'dWx error: ', rel_error(dWx_num, dWx)
    print 'dWh error: ', rel_error(dWh_num, dWh)
    print 'db error: ', rel_error(db_num, db)
Ejemplo n.º 4
0
dx_num = eval_numerical_gradient(
    lambda x: temporal_softmax_loss(x, y, mask)[0], x, verbose=False)

print('dx error: ', rel_error(dx, dx_num))

from rnn_layers import rnn_forward, rnn_backward

N, T, D, H = 2, 3, 4, 5

x = np.linspace(-0.1, 0.3, num=N * T * D).reshape(N, T, D)
h0 = np.linspace(-0.3, 0.1, num=N * H).reshape(N, H)
Wx = np.linspace(-0.2, 0.4, num=D * H).reshape(D, H)
Wh = np.linspace(-0.4, 0.1, num=H * H).reshape(H, H)
b = np.linspace(-0.7, 0.1, num=H)

h, _ = rnn_forward(x, h0, Wx, Wh, b)
expected_h = np.asarray(
    [[
        [-0.42070749, -0.27279261, -0.11074945, 0.05740409, 0.22236251],
        [-0.39525808, -0.22554661, -0.0409454, 0.14649412, 0.32397316],
        [-0.42305111, -0.24223728, -0.04287027, 0.15997045, 0.35014525],
    ],
     [[-0.55857474, -0.39065825, -0.19198182, 0.02378408, 0.23735671],
      [-0.27150199, -0.07088804, 0.13562939, 0.33099728, 0.50158768],
      [-0.51014825, -0.30524429, -0.06755202, 0.17806392, 0.40333043]]])
print('h error: ', rel_error(expected_h, h))

np.random.seed(231)

N, D, T, H = 2, 3, 10, 5
Ejemplo n.º 5
0
    def loss(self, features, captions):
        """
        Compute training-time loss for the RNN. We input image features and
        ground-truth captions for those images, and use an RNN to compute
        loss and gradients on all parameters.
        Inputs:
        - features: Input image features, of shape (N, D)
        - captions: Ground-truth captions; an integer array of shape (N, T) where
          each element is in the range 0 <= y[i, t] < V
        Returns a tuple of:
        - loss: Scalar loss
        - grads: Dictionary of gradients parallel to self.params
        """
        # Cut captions into two pieces: captions_in has everything but the last word
        # and will be input to the RNN; captions_out has everything but the first
        # word and this is what we will expect the RNN to generate. These are offset
        # by one relative to each other because the RNN should produce word (t+1)
        # after receiving word t. The first element of captions_in will be the START
        # token, and the first element of captions_out will be the first word.
        captions_in = captions[:, :-1]
        captions_out = captions[:, 1:]

        # You'll need this
        mask = (captions_out != self._null)

        # Weight and bias for the affine transform from image features to initial
        # hidden state
        W_proj, b_proj = self.params['W_proj'], self.params['b_proj']

        # Word embedding matrix
        W_embed = self.params['W_embed']

        # Input-to-hidden, hidden-to-hidden, and biases for the RNN
        Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

        # Weight and bias for the hidden-to-vocab transformation.
        W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']

        loss, grads = 0.0, {}

        # Forward Pass
        fc, cache1 = layers.fc_forward(features, W_proj, b_proj)
        emb, cache2 = rnn_layers.word_embedding_forward(captions_in, W_embed)
        emb = emb.transpose(1, 0, 2)
        rnn, cache3 = rnn_layers.rnn_forward(emb, fc, Wx, Wh, b)
        rnn = rnn.transpose(1, 0, 2)
        tfc, cache4 = rnn_layers.temporal_fc_forward(rnn, W_vocab, b_vocab)
        loss, dout = rnn_layers.temporal_softmax_loss(tfc, captions_out, mask)

        # Gradients
        dtfc, dW_vocab, db_vocab = rnn_layers.temporal_fc_backward(
            dout, cache4)
        dtfc = dtfc.transpose(1, 0, 2)
        drnn, dfc, dWx, dWh, db = rnn_layers.rnn_backward(dtfc, cache3)
        drnn = drnn.transpose(1, 0, 2)
        dW_embed = rnn_layers.word_embedding_backward(drnn, cache2)
        dfeature, dW_proj, db_proj = layers.fc_backward(dfc, cache1)

        grads = {
            'W_embed': dW_embed,
            'W_proj': dW_proj,
            'W_vocab': dW_vocab,
            'Wh': dWh,
            'Wx': dWx,
            'b': db,
            'b_proj': db_proj,
            'b_vocab': db_vocab
        }
        return loss, grads
Ejemplo n.º 6
0
    def train(self,
              X,
              y,
              learning_rate=1e-2,
              opt='sgd',
              n_iters=5000,
              batch_size=200,
              verbose=1):
        lr = learning_rate
        N, T, D = X.shape
        for i in xrange(n_iters):
            ids = np.random.choice(X.shape[0], batch_size)
            h0 = np.zeros((batch_size, self.hidden_dim))
            layer1, l1cache = rnn_layers.rnn_forward(X[ids], h0, self.Wx,
                                                     self.Wh, self.b,
                                                     self.non_liniearity)
            final_layer = (layer1[:, T - 1, :])
            layer2, l2cache = layers.dense_forward(final_layer, self.W1,
                                                   self.b1)
            loss, l3cache = layers.softmax_loss_forward(layer2, y[ids])
            self.loss_history.append(loss)

            if verbose == 1 and i % 500 == 0:
                print 'Iteration %d: loss %g' % (i, loss)

            dlayer3 = 1.0
            dlayer2 = layers.softmax_loss_backward(dlayer3, l3cache)
            dlayer1, dW1, db1 = layers.dense_backward(dlayer2, l2cache)
            dh = np.zeros((batch_size, T, self.hidden_dim))
            dh[:, T - 1, :] = dlayer1
            _, _, dWx, dWh, db = rnn_layers.rnn_backward(dh, l1cache)

            self.params, self.Wx = optimizers.optimize(self.params,
                                                       self.Wx,
                                                       dWx,
                                                       lr=lr,
                                                       name='Wx',
                                                       opt=opt)
            self.params, self.Wh = optimizers.optimize(self.params,
                                                       self.Wh,
                                                       dWh,
                                                       lr=lr,
                                                       name='Wh',
                                                       opt=opt)
            self.params, self.b = optimizers.optimize(self.params,
                                                      self.b,
                                                      db,
                                                      lr=lr,
                                                      name='b',
                                                      opt=opt)
            self.params, self.W1 = optimizers.optimize(self.params,
                                                       self.W1,
                                                       dW1,
                                                       lr=lr,
                                                       name='W1',
                                                       opt=opt)
            self.params, self.b1 = optimizers.optimize(self.params,
                                                       self.b1,
                                                       db1,
                                                       lr=lr,
                                                       name='b1',
                                                       opt=opt)
Ejemplo n.º 7
0
    def build_model(self):
        """
        Place Holder:
        - features: input image features of shape (N, D)
        - captions: ground-truth captions; an integer array of shape (N, T) where
          each element is in the range [0, V)

        Returns
        - logits: score of shape (N, T, V)
        - loss: Scalar loss
        """
        # some hyper-parameters
        T = self.T
        N = self.N
        V = self.V
        H = self.H
        M = self.M
        D = self.D

        # place holder features and captions
        features = self.features
        captions = self.captions

        # caption in, out and mask matrix
        captions_in = captions[:, :
                               T]  # same as captions[:, :-1], tensorflow doesn't provide negative stop slice yet.
        captions_out = captions[:, 1:]
        mask = tf.not_equal(captions_out, self._null)

        # word embedding matrix
        W_embed = self.params['W_embed']

        # parameters for (cnn_features)-to-(initial_hidden)
        W_proj = self.params['W_proj']
        b_proj = self.params['b_proj']

        # parameters for input-to-hidden, hidden-to-hidden
        Wx = self.params['Wx']
        Wh = self.params['Wh']
        b = self.params['b']

        # parameters for hidden-to-vocab
        W_vocab = self.params['W_vocab']
        b_vocab = self.params['b_vocab']

        # params used in some function call
        rnn_param = {'n_time_step': T}
        param = {
            'batch_size': N,
            'n_time_step': T,
            'dim_hidden': H,
            'vocab_size': V
        }

        # generate initial hidden state using cnn features
        h0 = affine_forward(features, W_proj, b_proj)  # (N, H)

        # generate input x (word vector)
        x = word_embedding_forward(captions_in, W_embed)  # (N, T, M)

        # rnn forward
        if self.cell_type == 'rnn':
            h = rnn_forward(x, h0, Wx, Wh, b, rnn_param)
        else:
            h = lstm_forward(x, h0, Wx, Wh, b, rnn_param)

        # hidden-to-vocab
        logits = temporal_affine_forward(h, W_vocab, b_vocab, param)

        # softmax loss
        loss = temporal_softmax_loss(logits, captions_out, mask, param)

        return logits, loss