예제 #1
0
    def test_forward_step(self):
        N, D, H = 3, 4, 5

        # Create the layer
        layer = LSTMRecurrentLayer(D, H)
        layer.Wx = np.linspace(-2.1, 1.3, num=4 * D * H).reshape(D, 4 * H)
        layer.Wh = np.linspace(-0.7, 2.2, num=4 * H * H).reshape(H, 4 * H)
        layer.b = np.linspace(0.3, 0.7, num=4 * H)

        # Create some arbitrary inputs
        x = np.linspace(-0.4, 1.2, num=N * D).reshape(N, D)
        prev_h = np.linspace(-0.3, 0.7, num=N * H).reshape(N, H)
        prev_c = np.linspace(-0.4, 0.9, num=N * H).reshape(N, H)

        next_h, next_c, _ = layer._forward_step(x, prev_h, prev_c)

        expected = np.asarray(
            [[0.24635157, 0.28610883, 0.32240467, 0.35525807, 0.38474904],
             [0.49223563, 0.55611431, 0.61507696, 0.66844003, 0.7159181],
             [0.56735664, 0.66310127, 0.74419266, 0.80889665, 0.858299]])
        self.assertAlmostEqual(rel_error(expected, next_h), 1e-9, places=2)

        expected = np.asarray(
            [[0.32986176, 0.39145139, 0.451556, 0.51014116, 0.56717407],
             [0.66382255, 0.76674007, 0.87195994, 0.97902709, 1.08751345],
             [0.74192008, 0.90592151, 1.07717006, 1.25120233, 1.42395676]])
        self.assertAlmostEqual(rel_error(expected, next_c), 1e-9, places=2)
예제 #2
0
    def test_backward(self):
        np.random.seed(271)

        N, T, H, M = 2, 3, 4, 5 

        # Create the layer
        layer = TemporalAffineLayer(H, M)
        W = np.random.randn(H, M)
        b = np.random.randn(M)

        # Create some arbitrary inputs
        x = np.random.randn(N, T, H)

        out = layer.forward(x, W=W, b=b)
        grad_out = np.random.randn(*out.shape)
        grad_x, grad_W, grad_b = layer.backward(grad_out)

        fx = lambda x: layer.forward(x, W=W, b=b)
        fw = lambda W: layer.forward(x, W=W, b=b)
        fb = lambda b: layer.forward(x, W=W, b=b)

        grad_x_num = eval_numerical_gradient_array(fx, x, grad_out)
        grad_W_num = eval_numerical_gradient_array(fw, W, grad_out)
        grad_b_num = eval_numerical_gradient_array(fb, b, grad_out)

        self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
def validate_gradient():
    """
    Function to validate the implementation of gradient computation.
    Should be used together with gradient_check.py.
    This is a useful thing to do when you implement your own gradient
    calculation methods.
    It is not required for this assignment.
    """
    from gradient_check import eval_numerical_gradient, rel_error
    # randomly initialize W
    dim = 4
    num_classes = 4
    num_inputs = 5
    params = {}
    std = 0.001
    params['W1'] = std * np.random.randn(dim, 10)
    params['b1'] = np.zeros(10)
    params['W2'] = std * np.random.randn(10, num_classes)
    params['b2'] = np.zeros(num_classes)

    X = np.random.randn(num_inputs, dim)
    y = np.array([0, 1, 2, 2, 1])

    loss, grads = compute_neural_net_loss(params, X, y, reg=0.1)
    # these should all be less than 1e-8 or so
    for param_name in params:
      f = lambda W: compute_neural_net_loss(params, X, y, reg=0.1)[0]
      param_grad_num = eval_numerical_gradient(f, params[param_name], verbose=False)
      print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
예제 #4
0
    def test_forward(self):
        N, D, H, T = 2, 5, 4, 3

        # Create the layer
        layer = LSTMRecurrentLayer(D, H)
        layer.Wx = np.linspace(-0.2, 0.9, num=4 * D * H).reshape(D, 4 * H)
        layer.Wh = np.linspace(-0.3, 0.6, num=4 * H * H).reshape(H, 4 * H)
        layer.b = np.linspace(0.2, 0.7, num=4 * H)

        # Create some arbitrary inputs
        x = np.linspace(-0.4, 0.6, num=N * T * D).reshape(N, T, D)
        h0 = np.linspace(-0.4, 0.8, num=N * H).reshape(N, H)

        hidden_state_over_time = layer.forward(x, h0)

        expected = np.asarray(
            [[[0.01764008, 0.01823233, 0.01882671, 0.0194232],
              [0.11287491, 0.12146228, 0.13018446, 0.13902939],
              [0.31358768, 0.33338627, 0.35304453, 0.37250975]],
             [[0.45767879, 0.4761092, 0.4936887, 0.51041945],
              [0.6704845, 0.69350089, 0.71486014, 0.7346449],
              [0.81733511, 0.83677871, 0.85403753, 0.86935314]]])

        self.assertAlmostEqual(rel_error(hidden_state_over_time, expected),
                               1e-9,
                               places=2)
예제 #5
0
    def test_backward(self):
        np.random.seed(271)

        N, D, T, H = 2, 3, 10, 6

        # Create the layer
        layer = LSTMRecurrentLayer(D, H)
        Wx = np.random.randn(D, 4 * H)
        Wh = np.random.randn(H, 4 * H)
        b = np.random.randn(4 * H)

        # Create some arbitrary inputs
        x = np.random.randn(N, T, D)
        h0 = np.random.randn(N, H)

        hidden_state_over_time = layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)

        grad_h_over_time = np.random.randn(*hidden_state_over_time.shape)

        grad_x, grad_h0, grad_Wx, grad_Wh, grad_b = layer.backward(
            grad_h_over_time)

        fx = lambda x: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fh0 = lambda h0: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fWx = lambda Wx: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fWh = lambda Wh: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fb = lambda b: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)

        grad_x_num = eval_numerical_gradient_array(fx, x, grad_h_over_time)
        grad_h0_num = eval_numerical_gradient_array(fh0, h0, grad_h_over_time)
        grad_Wx_num = eval_numerical_gradient_array(fWx, Wx, grad_h_over_time)
        grad_Wh_num = eval_numerical_gradient_array(fWh, Wh, grad_h_over_time)
        grad_b_num = eval_numerical_gradient_array(fb, b, grad_h_over_time)

        self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_h0_num, grad_h0), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_Wx_num, grad_Wx), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_Wh_num, grad_Wh), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
예제 #6
0
    def test_temporal_softmax_loss(self):
        N, T, V = 7, 8, 9
        score = np.random.randn(N, T, V)
        y = np.random.randint(V, size=(N, T))
        mask = (np.random.rand(N, T) > 0.5)

        _, grad_score = temporal_softmax_loss(score, y, mask, verbose=False)
        grad_score_num = eval_numerical_gradient(
            lambda x: temporal_softmax_loss(x, y, mask)[0],
            score,
            verbose=False)

        self.assertAlmostEqual(rel_error(grad_score, grad_score_num),
                               1e-9,
                               places=2)
예제 #7
0
    def gradient_check(self, X, y):
        """Runs gradient check on every parameter of the model

        Args:
            X: Input data in matrix form, of any shape
            y: Vector of labels
        """
        print "Running numeric gradient check with reg = %s" % self.reg

        loss, grads = self.loss(X, y)
        for param_key in sorted(self.params):
            f = lambda _: self.loss(X, y)[0]
            num_grad = eval_numerical_gradient(f,
                                               self.params[param_key],
                                               verbose=False)
            print "%s relative error: %.2e" % (
                param_key, rel_error(num_grad, grads[param_key]))
예제 #8
0
    def test_backward(self):
        np.random.seed(271)

        N, T, V, D = 50, 3, 5, 6

        # Create the layer
        layer = WordEmbeddingLayer(V, D)
        layer.W = np.random.randn(V, D)

        # Create some arbitrary inputs
        x = np.random.randint(V, size=(N, T))

        out = layer.forward(x)

        grad_out = np.random.randn(*out.shape)

        grad_W = layer.backward(grad_out)

        f = lambda W: layer.forward(x, W=W)
        grad_W_num = eval_numerical_gradient_array(f, layer.W, grad_out)

        self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
예제 #9
0
    def test_forward(self):
        V, D = 5, 3

        # Create the layer
        layer = WordEmbeddingLayer(V, D)
        layer.W = np.linspace(0, 1, num=V * D).reshape(V, D)

        # Create some arbitrary inputs
        x = np.asarray([[0, 3, 1, 2], [2, 1, 0, 3]])

        out = layer.forward(x)

        expected = np.asarray([[[0., 0.07142857, 0.14285714],
                                [0.64285714, 0.71428571, 0.78571429],
                                [0.21428571, 0.28571429, 0.35714286],
                                [0.42857143, 0.5, 0.57142857]],
                               [[0.42857143, 0.5, 0.57142857],
                                [0.21428571, 0.28571429, 0.35714286],
                                [0., 0.07142857, 0.14285714],
                                [0.64285714, 0.71428571, 0.78571429]]])

        self.assertAlmostEqual(rel_error(expected, out), 1e-9, places=2)