def test_lnlstm_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_lstm_precompute = LNLSTMLayer( l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_lstm_no_precompute = LNLSTMLayer( l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_lstm_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_lstm_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def test_CustomRecurrentLayer_child_kwargs(): in_shape = (2, 3, 4) n_hid = 5 # Construct mock for input-to-hidden layer in_to_hid = Mock(Layer, output_shape=(in_shape[0] * in_shape[1], n_hid), input_shape=(in_shape[0] * in_shape[1], in_shape[2]), input_layer=InputLayer( (in_shape[0] * in_shape[1], in_shape[2]))) # These two functions get called, need to return dummy values for them in_to_hid.get_output_for.return_value = T.matrix() in_to_hid.get_params.return_value = [] # As above, for hidden-to-hidden layer hid_to_hid = Mock(Layer, output_shape=(in_shape[0], n_hid), input_shape=(in_shape[0], n_hid), input_layer=InputLayer((in_shape[0], n_hid))) hid_to_hid.get_output_for.return_value = T.matrix() hid_to_hid.get_params.return_value = [] # Construct a CustomRecurrentLayer using these Mocks l_rec = lasagne.layers.CustomRecurrentLayer(InputLayer(in_shape), in_to_hid, hid_to_hid) # Call get_output with a kwarg, should be passd to in_to_hid and hid_to_hid helper.get_output(l_rec, foo='bar') # Retrieve the arguments used to call in_to_hid.get_output_for args, kwargs = in_to_hid.get_output_for.call_args # Should be one argument - the Theano expression assert len(args) == 1 # One keywould argument - should be 'foo' -> 'bar' assert kwargs == {'foo': 'bar'} # Same as with in_to_hid args, kwargs = hid_to_hid.get_output_for.call_args assert len(args) == 1 assert kwargs == {'foo': 'bar'}
def test_recurrent_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_rec_scan) output_unrolled = helper.get_output(l_rec_unroll) output_scan_val = output_scan.eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) output_unrolled_val = output_unrolled.eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_recurrent_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_no_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output(l_rec_precompute).eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) output_no_precompute = helper.get_output(l_rec_no_precompute).eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def test_gru_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_gru_scan) output_unrolled = helper.get_output(l_gru_unrolled) output_scan_val = output_scan.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_gru_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 5, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_gru_scan, x) output_unrolled = helper.get_output(l_gru_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_gru_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_no_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_gru_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_gru_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs) # out_layers = helper.get_all_layers(self.output_to_hidden) # out_layers[1].incoming_layer = self.hidden_to_hidden hid_pre += helper.get_output(self.output_to_hidden, hid_previous, **kwargs) # If the dot product is precomputed then add it, otherwise # calculate the input_to_hidden values and add them if self.precompute_input: hid_pre += input_n else: hid_pre += helper.get_output(self.input_to_hidden, input_n, **kwargs) # Clip gradients if self.grad_clipping: hid_pre = theano.gradient.grad_clip(hid_pre, -self.grad_clipping, self.grad_clipping) return self.nonlinearity(hid_pre)
def test_CustomRecurrentLayer_child_kwargs(): in_shape = (2, 3, 4) n_hid = 5 # Construct mock for input-to-hidden layer in_to_hid = Mock( Layer, output_shape=(in_shape[0]*in_shape[1], n_hid), input_shape=(in_shape[0]*in_shape[1], in_shape[2]), input_layer=InputLayer((in_shape[0]*in_shape[1], in_shape[2]))) # These two functions get called, need to return dummy values for them in_to_hid.get_output_for.return_value = T.matrix() in_to_hid.get_params.return_value = [] # As above, for hidden-to-hidden layer hid_to_hid = Mock( Layer, output_shape=(in_shape[0], n_hid), input_shape=(in_shape[0], n_hid), input_layer=InputLayer((in_shape[0], n_hid))) hid_to_hid.get_output_for.return_value = T.matrix() hid_to_hid.get_params.return_value = [] # Construct a CustomRecurrentLayer using these Mocks l_rec = lasagne.layers.CustomRecurrentLayer( InputLayer(in_shape), in_to_hid, hid_to_hid) # Call get_output with a kwarg, should be passd to in_to_hid and hid_to_hid helper.get_output(l_rec, foo='bar') # Retrieve the arguments used to call in_to_hid.get_output_for args, kwargs = in_to_hid.get_output_for.call_args # Should be one argument - the Theano expression assert len(args) == 1 # One keywould argument - should be 'foo' -> 'bar' assert kwargs == {'foo': 'bar'} # Same as with in_to_hid args, kwargs = hid_to_hid.get_output_for.call_args assert len(args) == 1 assert kwargs == {'foo': 'bar'}
def test_get_output_with_unused_kwarg(self, layers, get_output): l1, l2, l3 = layers unused_kwarg = object() with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') get_output(l3, kwagg=unused_kwarg) assert len(w) == 1 assert issubclass(w[0].category, UserWarning) assert 'perhaps you meant kwarg' in str(w[0].message)
def test_layer_from_shape_valid_get_output(self, layer_from_shape, get_output): layer = layer_from_shape inputs = {layer: theano.tensor.matrix()} assert get_output(layer, inputs) is inputs[layer] inputs = {None: theano.tensor.matrix()} layer.get_output_for = Mock() assert get_output(layer, inputs) is layer.get_output_for.return_value layer.get_output_for.assert_called_with(inputs[None])
def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output( self.hidden_to_hidden, hid_previous, **kwargs) hid_pre = T.concatenate([hid_pre, input_n], axis=1) hid_pre = helper.get_output(self.post_concat, hid_pre, **kwargs) if self.grad_clipping: hid_pre = theano.gradient.grad_clip( hid_pre, -self.grad_clipping, self.grad_clipping) return hid_pre
def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs) hid_pre = T.concatenate([hid_pre, input_n], axis=1) hid_pre = helper.get_output(self.post_concat, hid_pre, **kwargs) if self.grad_clipping: hid_pre = theano.gradient.grad_clip(hid_pre, -self.grad_clipping, self.grad_clipping) return hid_pre
def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with(l2.get_output_for.return_value) l2.get_output_for.assert_called_with(l1.input_var)
def test_lstm_grad(num_units): num_batch, seq_len, n_features = 5, 3, 10 l_inp = InputLayer((num_batch, seq_len, n_features)) l_lstm = LSTMLayer(l_inp, num_units=num_units) output = helper.get_output(l_lstm) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_lstm)) assert isinstance(g, (list, tuple))
def get_output_for(self, inputs, **kwargs): input = inputs[0] hid_init = None if self.hid_init_incoming_index > 0: hid_init = inputs[self.hid_init_incoming_index] # Input should be provided as (n_batch, n_time_steps, n_features) # but scan requires the iterable dimension to be first # So, we need to dimshuffle to (n_time_steps, n_batch, n_features) input = input.dimshuffle(1, 0, *range(2, input.ndim)) seq_len, num_batch = input.shape[0], input.shape[1] # precompute inputs before scanning trailing_dims = tuple(input.shape[n] for n in range(2, input.ndim)) input = T.reshape(input, (seq_len * num_batch, ) + trailing_dims) input = helper.get_output(self.input_to_hidden, input, **kwargs) # Reshape back to (seq_len, batch_size, trailing dimensions...) trailing_dims = tuple(input.shape[n] for n in range(1, input.ndim)) input = T.reshape(input, (seq_len, num_batch) + trailing_dims) # pass params to step non_seqs = helper.get_all_params(self.hidden_to_hidden) non_seqs += helper.get_all_params(self.post_concat) # Create single recurrent computation step function def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs) hid_pre = T.concatenate([hid_pre, input_n], axis=1) hid_pre = helper.get_output(self.post_concat, hid_pre, **kwargs) if self.grad_clipping: hid_pre = theano.gradient.grad_clip(hid_pre, -self.grad_clipping, self.grad_clipping) return hid_pre sequences = input step_fun = step if not isinstance(self.hid_init, Layer): # repeats self.hid_init num_batch times in first dimension dot_dims = (list(range(1, self.hid_init.ndim - 1)) + [0, self.hid_init.ndim - 1]) hid_init = T.dot(T.ones((num_batch, 1)), self.hid_init.dimshuffle(dot_dims)) hid_out = theano.scan(fn=step_fun, sequences=sequences, go_backwards=False, outputs_info=[hid_init], non_sequences=non_seqs, truncate_gradient=-1, strict=True)[0] # dimshuffle back to (n_batch, n_time_steps, n_features)) hid_out = hid_out.dimshuffle(1, 0, *range(2, hid_out.ndim)) return hid_out
def test_get_output_without_arguments(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with(l2.get_output_for.return_value) l2.get_output_for.assert_called_with(l1.input_var)
def test_embedding_2D_input(): import numpy as np import theano import theano.tensor as T from lasagne.layers import EmbeddingLayer, InputLayer, helper x = T.imatrix() batch_size = 2 seq_len = 3 emb_size = 5 vocab_size = 3 l_in = InputLayer((None, seq_len)) W = np.arange( vocab_size*emb_size).reshape((vocab_size, emb_size)).astype('float32') l1 = EmbeddingLayer(l_in, input_size=vocab_size, output_size=emb_size, W=W) x_test = np.array([[0, 1, 2], [0, 0, 2]], dtype='int32') # check output shape assert helper.get_output_shape( l1, (batch_size, seq_len)) == (batch_size, seq_len, emb_size) output = helper.get_output(l1, x) f = theano.function([x], output) np.testing.assert_array_almost_equal(f(x_test), W[x_test])
def get_functions(): input_layer=layers.InputLayer(shape=(BATCH_SIZE, INPUT_LENGTH)) print "input_layer size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1]) layer = input_layer for layer_num in range(len(NUM_UNITS_HIDDEN_LAYER)): print "layer_num-"+str(layer_num) layer=layers.DenseLayer(layer, num_units=NUM_UNITS_HIDDEN_LAYER[layer_num], W=lasagne.init.Normal(0.01), nonlinearity=nonlinearities.tanh) output_layer=layers.DenseLayer(layer, num_units=OUTPUT_SIZE, nonlinearity=nonlinearities.softmax) network_output=get_output(output_layer) expected_output=T.ivector() loss_train=aggregate(categorical_crossentropy(network_output, expected_output), mode='mean') all_weigths=layers.get_all_params(output_layer) update_rule=lasagne.updates.nesterov_momentum(loss_train, all_weigths, learning_rate=LEARNING_RATE) print "input_layer_end size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1]) train_function=theano.function(inputs=[input_layer.input_var, expected_output], outputs=loss_train, updates=update_rule, allow_input_downcast=True) prediction = T.argmax(network_output, axis=1) accuracy = T.mean(T.eq(prediction, expected_output), dtype=theano.config.floatX) # @UndefinedVariable test_function=theano.function(inputs=[input_layer.input_var, expected_output], outputs=[loss_train, accuracy, prediction], allow_input_downcast=True) output_function=theano.function([input_layer.input_var],get_output(output_layer), allow_input_downcast=True) return train_function,test_function,output_function
def test_recurrent_grad(): num_batch, seq_len, n_features = 5, 3, 10 num_units = 6 l_inp = InputLayer((num_batch, seq_len, n_features)) l_rec = RecurrentLayer(l_inp, num_units=num_units) output = helper.get_output(l_rec) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_rec)) assert isinstance(g, (list, tuple))
def test_lstm_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = LSTMLayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = LSTMLayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1])
def test_tuple_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) layer = ExpressionLayer(input_layer, func, output_shape=expected.shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected)
def test_get_output_without_arguments(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value) l2.get_output_for.assert_called_with( l1.input_var)
def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for(l2.get_output_for(l1.input_var)) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value) l2.get_output_for.assert_called_with( l1.input_var)
def test_gru_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = GRULayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = GRULayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1])
def test_cifar_deterministic(cls, **kwargs): model = cls.cifar_model(**kwargs) data = floatX(numpy.random.normal(0.0, 1.0, (64, 3, 32, 32))) input_var = tensor.tensor4('inputs') activation = get_output(model, input_var, deterministic=True) func = function([input_var], activation) output = func(data) del output
def test_lstm_grad(): num_batch, seq_len, n_features = 5, 3, 10 num_units = 6 x = T.tensor3() mask = T.matrix() l_inp = InputLayer((num_batch, seq_len, n_features)) l_lstm = LSTMLayer(l_inp, num_units=num_units) l_out = helper.get_output(l_lstm, x, mask=mask) g = T.grad(T.mean(l_out), lasagne.layers.get_all_params(l_lstm)) assert isinstance(g, (list, tuple))
def test_recurrent_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype("float32") # need to set random seed. np.random.seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) np.random.seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_rec_scan, x) output_unrolled = helper.get_output(l_rec_unroll, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output( self.hidden_to_hidden, hid_previous, **kwargs) # If the dot product is precomputed then add it, otherwise # calculate the input_to_hidden values and add them if self.precompute_input: hid_pre += input_n else: hid_pre += helper.get_output( self.input_to_hidden, input_n, **kwargs) # Clip gradients if self.grad_clipping: hid_pre = theano.gradient.grad_clip( hid_pre, -self.grad_clipping, self.grad_clipping) return self.nonlinearity(hid_pre)
def test_get_output_with_single_argument(self, layers, get_output): l1, l2, l3 = layers inputs, kwarg = theano.tensor.matrix(), object() output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for(l2.get_output_for(inputs, kwarg=kwarg), # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with(l2.get_output_for.return_value, kwarg=kwarg) l2.get_output_for.assert_called_with(inputs, kwarg=kwarg)
def step(input_n, hid_previous, *args): # Compute the hidden-to-hidden activation hid_to_hid = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs) # Compute the input-to-hidden activation if self.precompute_input: # if the input is precomputed in_to_hid = input_n else: # compute the input in_to_hid = helper.get_output(self.input_to_hidden, input_n, **kwargs) # Compute the second order term if self.a_g is not None: second_order_term = (self.a_g * in_to_hid * hid_to_hid) # second_order_term = in_to_hid * hid_to_hid else: second_order_term = 0 # Compute the first order hidden-to-hidden term if self.b_g_hid_to_hid is not None: f_o_hid_to_hid = self.b_g_hid_to_hid * hid_to_hid else: f_o_hid_to_hid = 0 # Compute first order input to hidden term if self.b_g_in_to_hid is not None: f_o_in_to_hid = self.b_g_in_to_hid * in_to_hid else: # if all else is None, it will output zeros of the right size f_o_in_to_hid = T.zeros_like(in_to_hid) hid_pre = second_order_term + f_o_in_to_hid + f_o_hid_to_hid if self.b is not None: hid_pre = hid_pre + self.b return self.nonlinearity(hid_pre)
def step(input_n, hid_prevprev, hid_previous, *args): # Compute the hidden-to-hidden activation hid_pre = helper.get_output(self.hidden_to_hidden, hid_previous, **kwargs) # If the dot product is precomputed then add it, otherwise # calculate the input_to_hidden values and add them if self.precompute_input: hid_pre += input_n else: hid_pre += helper.get_output( self.input_to_hidden, input_n, **kwargs) # Clip gradients if self.grad_clipping: hid_pre = theano.gradient.grad_clip(hid_pre, -self.grad_clipping, self.grad_clipping) hid_pre += self.gamma * hid_prevprev * T.clip(T.tile(T.reshape(T.diagonal(T.dot(hid_prevprev, hid_previous.T)), (1,hid_previous.shape[0])), (hid_previous.shape[1],1)).T, 0.0, 100.0) return self.nonlinearity( hid_pre )
def test_get_output_with_single_argument(self, layers, get_output): l1, l2, l3 = layers inputs, kwarg = theano.tensor.matrix(), object() output = get_output(l3, inputs, kwarg=kwarg) # expected: l3.get_output_for(l2.get_output_for(inputs, kwarg=kwarg), # kwarg=kwarg) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with( l2.get_output_for.return_value, kwarg=kwarg) l2.get_output_for.assert_called_with( inputs, kwarg=kwarg)
def test_recurrent_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() mask = T.matrix() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype("float32") mask_in = np.ones((num_batch, seq_len), dtype="float32") # need to set random seed. np.random.seed(1234) l_rec_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=True) np.random.seed(1234) l_rec_no_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=False) output_precompute = helper.get_output(l_rec_precompute, x, mask=mask).eval({x: x_in, mask: mask_in}) output_no_precompute = helper.get_output(l_rec_no_precompute, x, mask=mask).eval({x: x_in, mask: mask_in}) np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def get_output_for(self, input, **kwargs): rs = input.reshape((input.shape[0], input.shape[1], input.shape[2], 1)) # B,V,S,1 z1 = T.tile( rs, (1,1,1,input.shape[2])) z2 = z1.transpose((0,1,3,2)) Z = T.concatenate([z1,z2],axis=1) Y = helper.get_output(self.subnet, Z) if self.pooling == 'mean': return T.mean(Y,axis=3) elif self.pooling == 'max': return T.max(Y,axis=3) else: return self.pooling(Y)
def test_get_output_input_is_a_mapping(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p inputs = {l3: theano.tensor.matrix()} # expected: inputs[l3] assert get_output(l3, inputs) is inputs[l3] # l3.get_output_for, l2.get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2.get_output_for.call_count == 0 # l1.input_var should not have been accessed assert p.call_count == 0
def test_get_output_without_arguments(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3) # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var), # l2[1].get_output_for(l1[1].input_var)]) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ]) l2[0].get_output_for.assert_called_with(l1[0].input_var) l2[1].get_output_for.assert_called_with(l1[1].input_var)
def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var), # l2[1].get_output_for(l1[1].input_var)]) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ]) l2[0].get_output_for.assert_called_with(l1[0].input_var) l2[1].get_output_for.assert_called_with(l1[1].input_var)
def test_lstm_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. np.random.seed(1234) l_lstm_fwd = LSTMLayer(l_inp, num_units=num_units, backwards=False) np.random.seed(1234) l_lstm_bck = LSTMLayer(l_inp, num_units=num_units, backwards=True) l_out_fwd = helper.get_output(l_lstm_fwd, x) l_out_bck = helper.get_output(l_lstm_bck, x) f_lstm = theano.function([x], [l_out_fwd, l_out_bck]) f_out_fwd, f_out_bck = f_lstm(x_in) # test that the backwards model reverses its final input np.testing.assert_almost_equal(f_out_fwd, f_out_bck[:, ::-1])
def test_callable_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) def get_shape(input_shape): return func(np.empty(shape=input_shape)).shape layer = ExpressionLayer(input_layer, func, output_shape=get_shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected)
def test_get_output_input_is_a_mapping_to_array(self, layers, get_output): l1, l2, l3 = layers p = PropertyMock() type(l1).input_var = p inputs = {l3: [[1, 2, 3]]} output = get_output(l3, inputs) # expected: inputs[l3] assert numpy.all(output.eval() == inputs[l3]) # l3.get_output_for, l2.get_output_for should not have been called assert l3.get_output_for.call_count == 0 assert l2.get_output_for.call_count == 0 # l1.input_var should not have been accessed assert p.call_count == 0
def test(precompute, order, learn_init, unroll_scan): in_l1 = L.InputLayer((5, 3, 12), name="input") in_l2 = L.InputLayer((5, 3, 13), name="input") n_in = 6 if order == "TND" else 10 step_l = L.RENStep((n_in, 25), 10, 25, name="cell", pre_compute_input=precompute, learn_init=learn_init) rec_l = L.RNNLayer((in_l1, in_l2), step_l, name="rec", in_order=order, unroll_scan=unroll_scan) r1 = theano.shared(np.random.randn(5, 3, 12).astype(theano.config.floatX)) r2 = theano.shared(np.random.randn(5, 3, 13).astype(theano.config.floatX)) out = h.get_output(rec_l, inputs={in_l1: r1, in_l2: r2}).eval() print("Predicted:", h.get_output_shape(rec_l)) print("Actual: ", out.shape) print("Min-max [{:.3f}, {:.3f}]".format(np.min(out), np.max(out)))
def test_gru_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_fwd = GRULayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_gru_bck = GRULayer(l_inp, num_units=num_units, backwards=True) output_fwd = helper.get_output(l_gru_fwd, x) output_bck = helper.get_output(l_gru_bck, x) output_fwd_val = output_fwd.eval({x: x_in}) output_bck_val = output_bck.eval({x: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
def test_get_output_for(self, invlayer_vars): from lasagne.layers.helper import get_output invlayer = invlayer_vars['invlayer'] layer = invlayer_vars['layer'] W = layer.W.get_value() input = theano.shared(np.random.rand(*layer.input_shape)) results = get_output(invlayer, inputs=input) # Check that the output of the invlayer is the output of the # dot product of the output of the dense layer and the # transposed weights assert np.allclose(results.eval(), np.dot(np.dot(input.get_value(), W), W.T))
def test_recurrent_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype("float32") # need to set random seed. np.random.seed(1234) l_rec_fwd = RecurrentLayer(l_inp, num_units=num_units, backwards=False) np.random.seed(1234) l_rec_bck = RecurrentLayer(l_inp, num_units=num_units, backwards=True) l_out_fwd = helper.get_output(l_rec_fwd, x) l_out_bck = helper.get_output(l_rec_bck, x) output_fwd = l_out_fwd.eval({l_out_fwd: x_in}) output_bck = l_out_bck.eval({l_out_bck: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd, output_bck[:, ::-1])
def test_get_output_input_is_a_mapping_no_key(self, layers, get_output): l1, l2, l3 = layers output = get_output(l3, {}) # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var), # l2[1].get_output_for(l1[1].input_var)]) assert output is l3.get_output_for.return_value l3.get_output_for.assert_called_with([ l2[0].get_output_for.return_value, l2[1].get_output_for.return_value, ]) l2[0].get_output_for.assert_called_with( l1[0].input_var) l2[1].get_output_for.assert_called_with( l1[1].input_var)
def test_get_output_for(self, invlayer_vars): from lasagne.layers.helper import get_output invlayer = invlayer_vars['invlayer'] layer = invlayer_vars['layer'] W = layer.W.get_value() input = theano.shared( np.random.rand(*layer.input_shape)) results = get_output(invlayer, inputs=input) # Check that the output of the invlayer is the output of the # dot product of the output of the dense layer and the # transposed weights assert np.allclose( results.eval(), np.dot(np.dot(input.get_value(), W), W.T))
def test_lstm_variable_input_size(): # that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch+1, 3+1, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 3, n_features1)).astype('float32') l_rec = LSTMLayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output_val1 = output.eval({x: x_in1}) output_val2 = output.eval({x: x_in2})
def test_gru_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) l_rec = GRULayer(l_inp, num_units=num_units) x_in = np.random.random(in_shp).astype('float32') output = helper.get_output(l_rec, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units)