def test_fprop(self): """ Use an RNN without non-linearity to create the Mersenne numbers (2 ** n - 1) to check whether fprop works correctly. """ rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[ Recurrent(dim=1, layer_name='recurrent', irange=0.1, indices=[-1], nonlinearity=lambda x: x) ]) W, U, b = rnn.layers[0].get_params() W.set_value([[1]]) U.set_value([[2]]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_hat = rnn.fprop((X_data, X_mask)) seq_len = 20 X_data_vals = np.ones((seq_len, seq_len, 1)) X_mask_vals = np.triu(np.ones((seq_len, seq_len))) f = function([X_data, X_mask], y_hat, allow_input_downcast=True) np.testing.assert_allclose(2**np.arange(1, seq_len + 1) - 1, f(X_data_vals, X_mask_vals).flatten())
def outer(self): if (not self.rnn_friendly and self._requires_reshape and (not isinstance(get_target_space(self), SequenceSpace) and not isinstance(get_target_space(self), SequenceDataSpace))): if isinstance(self.mlp.input_space, SequenceSpace): return SequenceSpace(get_target_space(self)) elif isinstance(self.mlp.input_space, SequenceDataSpace): return SequenceDataSpace(get_target_space(self)) else: return get_target_space(self)
def set_input_space(self, space): if ((not isinstance(space, SequenceSpace) and not isinstance(space, SequenceDataSpace)) or not isinstance(space.space, VectorSpace)): raise ValueError("Recurrent layer needs a SequenceSpace(" "VectorSpace) or SequenceDataSpace(VectorSpace)\ as input but received %s instead" % (space)) self.input_space = space if self.indices is not None: if len(self.indices) > 1: raise ValueError("Only indices = [-1] is supported right now") self.output_space = CompositeSpace( [VectorSpace(dim=self.dim) for _ in range(len(self.indices))] ) else: assert self.indices == [-1], "Only indices = [-1] works now" self.output_space = VectorSpace(dim=self.dim) else: if isinstance(self.input_space, SequenceSpace): self.output_space = SequenceSpace(VectorSpace(dim=self.dim)) elif isinstance(self.input_space, SequenceDataSpace): self.output_space =\ SequenceDataSpace(VectorSpace(dim=self.dim)) # Initialize the parameters rng = self.mlp.rng if self.irange is None: raise ValueError("Recurrent layer requires an irange value in " "order to initialize its weight matrices") input_dim = self.input_space.dim # W is the input-to-hidden matrix W = rng.uniform(-self.irange, self.irange, (input_dim, self.dim * 4)) # U is the hidden-to-hidden transition matrix U = np.zeros((self.dim, self.dim * 4)) for i in xrange(4): u = rng.randn(self.dim, self.dim) U[:, i*self.dim:(i+1)*self.dim], _ = scipy.linalg.qr(u) # b is the bias b = np.zeros((self.dim * 4,)) self._params = [ sharedX(W, name=(self.layer_name + '_W')), sharedX(U, name=(self.layer_name + '_U')), sharedX(b + self.init_bias, name=(self.layer_name + '_b')) ]
def test_gradient(self): """ Testing to see whether the gradient can be calculated. """ rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[Recurrent(dim=2, layer_name='recurrent', irange=0, nonlinearity=lambda x: x), Linear(dim=1, layer_name='linear', irange=0)]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_data, y_mask = rnn.get_output_space().make_theano_batch() default_cost = Default() cost = default_cost.expr(rnn, ((X_data, X_mask), (y_data, y_mask))) tensor.grad(cost, rnn.get_params(), disconnected_inputs='ignore')
def test_cost(self): """ Use an RNN to calculate Mersenne number sequences of different lengths and check whether the costs make sense. """ rnn = RNN(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[ Recurrent(dim=1, layer_name='recurrent', irange=0, nonlinearity=lambda x: x), Linear(dim=1, layer_name='linear', irange=0) ]) W, U, b = rnn.layers[0].get_params() W.set_value([[1]]) U.set_value([[2]]) W, b = rnn.layers[1].get_params() W.set_value([[1]]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_data, y_mask = rnn.get_output_space().make_theano_batch() y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask)) seq_len = 20 X_data_vals = np.ones((seq_len, seq_len, 1)) X_mask_vals = np.triu(np.ones((seq_len, seq_len))) y_data_vals = np.tile((2**np.arange(1, seq_len + 1) - 1), (seq_len, 1)).T[:, :, np.newaxis] y_mask_vals = np.triu(np.ones((seq_len, seq_len))) f = function([X_data, X_mask, y_data, y_mask], rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)), allow_input_downcast=True) # The cost for two exact sequences should be zero assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0 # If the input is different, the cost should be non-zero assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0 # And same for the target data; using squared L2 norm, so should be 1 assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1 # But if the masked data changes, the cost should remain the same X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None]) assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0 y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None]) assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0