Esempio n. 1
0
    def test_gradient(self):
        """
        Testing to see whether the gradient can be calculated.
        """
        rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=2, layer_name='recurrent',
                                    irange=0, nonlinearity=lambda x: x),
                          Linear(dim=1, layer_name='linear', irange=0)])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()

        default_cost = Default()
        cost = default_cost.expr(rnn, ((X_data, X_mask), (y_data, y_mask)))
        tensor.grad(cost, rnn.get_params(), disconnected_inputs='ignore')
Esempio n. 2
0
    def test_cost(self):
        """
        Use an RNN to calculate Mersenne number sequences of different
        lengths and check whether the costs make sense.
        """
        rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[
                      Recurrent(dim=1,
                                layer_name='recurrent',
                                irange=0,
                                nonlinearity=lambda x: x),
                      Linear(dim=1, layer_name='linear', irange=0)
                  ])
        W, U, b = rnn.layers[0].get_params()
        W.set_value([[1]])
        U.set_value([[2]])

        W, b = rnn.layers[1].get_params()
        W.set_value([[1]])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()
        y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask))

        seq_len = 20
        X_data_vals = np.ones((seq_len, seq_len, 1))
        X_mask_vals = np.triu(np.ones((seq_len, seq_len)))
        y_data_vals = np.tile((2**np.arange(1, seq_len + 1) - 1),
                              (seq_len, 1)).T[:, :, np.newaxis]
        y_mask_vals = np.triu(np.ones((seq_len, seq_len)))

        f = function([X_data, X_mask, y_data, y_mask],
                     rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)),
                     allow_input_downcast=True)
        # The cost for two exact sequences should be zero
        assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0
        # If the input is different, the cost should be non-zero
        assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0
        # And same for the target data; using squared L2 norm, so should be 1
        assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1
        # But if the masked data changes, the cost should remain the same
        X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None])
        assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0
        y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None])
        assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0
Esempio n. 3
0
    def test_cost(self):
        """
        Use an RNN to calculate Mersenne number sequences of different
        lengths and check whether the costs make sense.
        """
        rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)),
                  layers=[Recurrent(dim=1, layer_name='recurrent',
                                    irange=0, nonlinearity=lambda x: x),
                          Linear(dim=1, layer_name='linear', irange=0)])
        W, U, b = rnn.layers[0].get_params()
        W.set_value([[1]])
        U.set_value([[2]])

        W, b = rnn.layers[1].get_params()
        W.set_value([[1]])

        X_data, X_mask = rnn.get_input_space().make_theano_batch()
        y_data, y_mask = rnn.get_output_space().make_theano_batch()
        y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask))

        seq_len = 20
        X_data_vals = np.ones((seq_len, seq_len, 1))
        X_mask_vals = np.triu(np.ones((seq_len, seq_len)))
        y_data_vals = np.tile((2 ** np.arange(1, seq_len + 1) - 1),
                              (seq_len, 1)).T[:, :, np.newaxis]
        y_mask_vals = np.triu(np.ones((seq_len, seq_len)))

        f = function([X_data, X_mask, y_data, y_mask],
                     rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)),
                     allow_input_downcast=True)
        # The cost for two exact sequences should be zero
        assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0
        # If the input is different, the cost should be non-zero
        assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0
        # And same for the target data; using squared L2 norm, so should be 1
        assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1
        # But if the masked data changes, the cost should remain the same
        X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None])
        assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0
        y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None])
        assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0