def getTrainedRNN(): """Read from file and set the params""" # TODO: Refactor so as to do this only once) input_size = 39 hidden_size = 50 num_output_classes = 29 learning_rate = 0.001 output_size = num_output_classes + 1 batch_size = None input_seq_length = None gradient_clipping = 5 l_in = InputLayer(shape=(batch_size, input_seq_length, input_size)) n_batch, n_time_steps, n_features = l_in.input_var.shape # Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu) l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu) l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True) l_rec_accumulation = ElemwiseSumLayer([l_rec_forward, l_rec_backward]) l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1, hidden_size)) l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu) l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear) l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size)) # Reshaping back l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size)) with np.load('CTC_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable=True) output = lasagne.layers.get_output(l_out_softmax_reshaped) return l_in, output
def test_recurrent_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_rec_scan, x) output_unrolled = helper.get_output(l_rec_unroll, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_recurrent_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_rec_scan) output_unrolled = helper.get_output(l_rec_unroll) output_scan_val = output_scan.eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) output_unrolled_val = output_unrolled.eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_recurrent_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_rec_no_precompute = RecurrentLayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output(l_rec_precompute).eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) output_no_precompute = helper.get_output(l_rec_no_precompute).eval({ l_inp.input_var: x_in, l_mask_inp.input_var: mask_in }) np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def exe_rnn(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='RNN') # W = layer_rnn.W_hid_to_hid.sum() # U = layer_rnn.W_in_to_hid.sum() # b = layer_rnn.b.sum() layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
def test_recurrent_tensor_init(): # check if passing in a TensorVariable to hid_init works num_units = 5 batch_size = 3 seq_len = 2 n_inputs = 4 in_shp = (batch_size, seq_len, n_inputs) l_inp = InputLayer(in_shp) hid_init = T.matrix() x = T.tensor3() l_rec = RecurrentLayer(l_inp, num_units, learn_init=True, hid_init=hid_init) # check that the tensor is used assert hid_init == l_rec.hid_init # b, W_hid_to_hid and W_in_to_hid, should not return any inits assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 3 # b, should not return any inits assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 1 # check that it compiles and runs output = lasagne.layers.get_output(l_rec, x) x_test = np.ones(in_shp, dtype='float32') hid_init_test = np.ones((batch_size, num_units), dtype='float32') output_val = output.eval({x: x_test, hid_init: hid_init_test}) assert isinstance(output_val, np.ndarray)
def test_recurrent_grad(): num_batch, seq_len, n_features = 5, 3, 10 num_units = 6 l_inp = InputLayer((num_batch, seq_len, n_features)) l_rec = RecurrentLayer(l_inp, num_units=num_units) output = helper.get_output(l_rec) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_rec)) assert isinstance(g, (list, tuple))
def test_recurrent_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = RecurrentLayer(l_inp, n_units, hid_init=l_inp_h, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = RecurrentLayer(l_inp, n_units, hid_init=Xh_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def test_recurrent_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = RecurrentLayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = RecurrentLayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1])
def test_recurrent_nparams_learn_init(): l_inp = InputLayer((2, 2, 3)) l_rec = RecurrentLayer(l_inp, 5, learn_init=True) # b, W_hid_to_hid and W_in_to_hid + hid_init assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 4 # b + hid_init assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
def test_recurrent_hid_init_layer(): # test that you can set hid_init to be a layer l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h) x = T.tensor3() h = T.matrix() output = lasagne.layers.get_output(l_rec, {l_inp: x, l_inp_h: h})
def test_recurrent_grad_clipping(): num_units = 5 batch_size = 3 seq_len = 2 n_inputs = 4 in_shp = (batch_size, seq_len, n_inputs) l_inp = InputLayer(in_shp) x = T.tensor3() l_rec = RecurrentLayer(l_inp, num_units, grad_clipping=1.0) output = lasagne.layers.get_output(l_rec, x)
def test_gradient_steps_error(): # Check that error is raised if gradient_steps is not -1 and scan_unroll # is true l_in = InputLayer((2, 2, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True)
def test_unroll_none_input_error(): # Test that a ValueError is raised if unroll scan is True and the input # sequence length is specified as None. l_in = InputLayer((2, None, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, unroll_scan=True)
def test_recurrent_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_rec_fwd = RecurrentLayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_rec_bck = RecurrentLayer(l_inp, num_units=num_units, backwards=True) l_out_fwd = helper.get_output(l_rec_fwd, x) l_out_bck = helper.get_output(l_rec_bck, x) output_fwd = l_out_fwd.eval({l_out_fwd: x_in}) output_bck = l_out_bck.eval({l_out_bck: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd, output_bck[:, ::-1])
def test_recurrent_hid_init_mask(): # test that you can set hid_init to be a layer when a mask is provided l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_msk = InputLayer((2, 2)) l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk) x = T.tensor3() h = T.matrix() msk = T.matrix() inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk} output = lasagne.layers.get_output(l_rec, inputs)
def test_recurrent_variable_input_size(): # check that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch + 1, 10, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32') l_rec = RecurrentLayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output_val1 = output.eval({x: x_in1}) output_val2 = output.eval({x: x_in2})
def test_recurrent_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) l_rec = RecurrentLayer(l_inp, num_units=num_units) x_in = np.random.random(in_shp).astype('float32') output = helper.get_output(l_rec, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units)
def rnn_model(M, K=20, hh=.0001, ep=5000, d=0, wsp=0.0001, hsp=0, spb=3, bt=0, al='rmsprop', t=5): # Copy key variables to GPU _M = Th.matrix('_M') # Input and forward transform I = InputLayer(shape=(None, M.shape[0]), input_var=_M) # First layer is the transform to a non-negative subspace H0 = DenseLayer(I, num_units=K, nonlinearity=lambda x: psoftplus(x, spb), b=None) # Optional dropout H = DropoutLayer(H0, d) # Compute output R = RecurrentLayer(H, num_units=M.T.shape[1], nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=t, b=None) # Cost function Ro = get_output(R) + eps cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \ + hsp*Th.mean( get_output( H0)) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R)) train = downhill.Dataset(M.T.astype(float32), batch_size=bt) er = downhill_train(opt, train, hh, ep, None) # Get approximation _r = nget(R, _M, M.T.astype(float32)).T _h = nget(H, _M, M.T.astype(float32)).T return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
def test_recurrent_nparams_hid_init_layer(): # test that you can see layers through hid_init l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_h_de = DenseLayer(l_inp_h, 7) l_rec = RecurrentLayer(l_inp, 7, hid_init=l_inp_h_de) # directly check the layers can be seen through hid_init assert lasagne.layers.get_all_layers(l_rec) == [l_inp, l_inp_h, l_inp_h_de, l_rec] # b, W_hid_to_hid and W_in_to_hid + W + b assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 5 # b (recurrent) + b (dense) assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
def test_recurrent_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = RecurrentLayer(l_inp, n_units, hid_init=l_inp_h, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = RecurrentLayer(l_inp, n_units, hid_init=Xh_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, { l_inp: X, l_inp_h: Xh }) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({ X: X_test, Xh: Xh_test_batch }) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def create_rnn(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.05) for i in range(1): network = RecurrentLayer(network, hidden_layer_size, W_hid_to_hid=GlorotUniform(), W_in_to_hid=GlorotUniform(), b=Constant(1.0), nonlinearity=leaky_rectify, learn_init=True) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def main(): input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, LENGTH, 1), input_var=input_var, name='input') layer_rnn = RecurrentLayer(layer_input, NUM_UNITS, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.Constant(1), W_hid_to_hid=lasagne.init.Constant(2), b=None, name='RNN') W = layer_rnn.W_hid_to_hid U = layer_rnn.W_in_to_hid output = lasagne.layers.get_output(layer_rnn) output = output.mean(axis=1) prediction = T.switch(T.gt(output, 0), 1, -1) acc = T.eq(prediction, target_var) acc = acc.sum() # get the output before activation function tanh epsilon = 1e-6 prob = 0.5 * T.log((1 + output + epsilon) / (1 - output + epsilon)) prob = nonlinearities.sigmoid(prob) loss = -0.5 * ((1 + target_var) * T.log(prob) + (1 - target_var) * T.log(1 - prob)) loss = loss.sum() batch_size = 100 learning_rate = 0.01 steps_per_epoch = 1000 params = lasagne.layers.get_all_params(layer_rnn, trainable=True) updates = lasagne.updates.sgd(loss, params=params, learning_rate=learning_rate) train_fn = theano.function([input_var, target_var], [loss, acc, W, U, output], updates=updates) for epoch in range(3): print 'Epoch %d (learning rate=%.4f)' % (epoch, learning_rate) loss = 0.0 correct = 0.0 num_back = 0 for step in range(steps_per_epoch): x, y = get_batch(batch_size) err, corr, w, u, pred = train_fn(x, y) # print x # print y # print pred loss += err correct += corr num_inst = (step + 1) * batch_size # update log sys.stdout.write("\b" * num_back) log_info = 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%, W: %.6f, U: %.6f' % ( num_inst, loss / num_inst, correct, correct * 100 / num_inst, w.sum(), u.sum()) sys.stdout.write(log_info) num_back = len(log_info) # raw_input() # update training log after each epoch sys.stdout.write("\b" * num_back) assert num_inst == batch_size * steps_per_epoch print 'inst: %d loss: %.4f, corr: %d, acc: %.2f%%' % ( num_inst, loss / num_inst, correct, correct * 100 / num_inst)
def get_model(): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.matrix('targets') # input layer with unspecified batch size layer_both_0 = InputLayer(shape=(None, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_both_1 = batch_norm( Conv2DLayer(layer_both_0, 64, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_both_2 = batch_norm( Conv2DLayer(layer_both_1, 64, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_both_3 = MaxPool2DLayer(layer_both_2, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_both_4 = DropoutLayer(layer_both_3, p=0.25) # Systole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_systole_0 = batch_norm( Conv2DLayer(layer_both_4, 96, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_systole_1 = batch_norm( Conv2DLayer(layer_systole_0, 96, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_systole_2 = MaxPool2DLayer(layer_systole_1, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_systole_3 = DropoutLayer(layer_systole_2, p=0.25) # Diastole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_diastole_0 = batch_norm( Conv2DLayer(layer_both_4, 96, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_diastole_1 = batch_norm( Conv2DLayer(layer_diastole_0, 96, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_diastole_2 = MaxPool2DLayer(layer_diastole_1, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_diastole_3 = DropoutLayer(layer_diastole_2, p=0.25) # Systole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_systole_4 = batch_norm( Conv2DLayer(layer_systole_3, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_systole_5 = batch_norm( Conv2DLayer(layer_systole_4, 128, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_systole_6 = MaxPool2DLayer(layer_systole_5, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_systole_7 = DropoutLayer(layer_systole_6, p=0.25) # Diastole : Convolution then batchNormalisation then activation layer, twice, then zero padding layer followed by a dropout layer layer_diastole_4 = batch_norm( Conv2DLayer(layer_diastole_3, 128, (3, 3), pad='same', nonlinearity=leaky_rectify)) layer_diastole_5 = batch_norm( Conv2DLayer(layer_diastole_4, 128, (3, 3), pad='valid', nonlinearity=leaky_rectify)) layer_diastole_6 = MaxPool2DLayer(layer_diastole_5, pool_size=(2, 2), stride=(2, 2), pad=(1, 1)) layer_diastole_7 = DropoutLayer(layer_diastole_6, p=0.25) # Systole : Last layers layer_systole_8 = FlattenLayer(layer_systole_7) layer_systole_9 = DenseLayer(layer_systole_8, 1024, nonlinearity=leaky_rectify) layer_systole_10 = DropoutLayer(layer_systole_9, p=0.5) layer_systole_11 = DenseLayer(layer_systole_10, 600, nonlinearity=softmax) # Diastole : Last layers layer_diastole_8 = FlattenLayer(layer_diastole_7) layer_diastole_9 = DenseLayer(layer_diastole_8, 1024, nonlinearity=leaky_rectify) layer_diastole_10 = DropoutLayer(layer_diastole_9, p=0.5) layer_diastole_11 = DenseLayer(layer_diastole_10, 600, nonlinearity=softmax) # Add reccurrent layer and merge layer for output layer_recurrent = RecurrentLayer( ConcatLayer([layer_systole_9, layer_diastole_9]), 512) layer_both_5 = ConcatLayer([layer_systole_11, layer_diastole_11]) # Loss prediction = get_output(layer_both_5) loss = squared_error(prediction, target_var) loss = loss.mean() + regularize_layer_params( layer_systole_9, l2) + regularize_layer_params(layer_diastole_9, l2) #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_both_5, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_both_5, deterministic=True) test_loss = squared_error(test_prediction, target_var) test_loss = test_loss.mean() # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], test_loss) # Compule a third function computing the prediction predict_fn = theano.function([input_var], test_prediction) return [layer_both_5, train_fn, val_fn, predict_fn]
def rnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rmsprop', t=5): # Get dictionary shapes K = [W1[0].shape[0], W2[0].shape[0]] # GPU cached data _M = theano.shared(M.T.astype(float32)) dum = Th.vector('dum') # We have weights to discover H = theano.shared( sqrt(2. / (K[0] + K[1] + M.shape[1])) * random.rand(M.T.shape[0], K[0] + K[1]).astype(float32)) fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H) # Split in two pathways fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Dropout? dfW1 = DropoutLayer(fW1, dum[0]) dfW2 = DropoutLayer(fW2, dum[0]) # Compute source modulators using previously learned dictionaries R1 = RecurrentLayer(dfW1, num_units=M.T.shape[1], b=None, W_in_to_hid=W1[0].astype(float32), W_hid_to_hid=W1[1].astype(float32), nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=5) R2 = RecurrentLayer(dfW2, num_units=M.T.shape[1], b=None, W_in_to_hid=W2[0].astype(float32), W_hid_to_hid=W2[1].astype(float32), nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=5) # Add the two approximations R = ElemwiseSumLayer([R1, R2]) # Cost function Ro = get_output(R) + eps cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \ + sp*Th.mean( abs( H)) + 0*Th.mean( dum) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([d]).astype(float32), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float32)).T + eps _r1 = nget(R1, dum, array([0]).astype(float32)).T _r2 = nget(R2, dum, array([0]).astype(float32)).T return _r, _r1, _r2, er
def test_recurrent_init_val_error(): # check if errors are raised when init is non matrix tensor hid_init = T.vector() with pytest.raises(ValueError): l_rec = RecurrentLayer(InputLayer((2, 2, 3)), 5, hid_init=hid_init)
pulse_end = pulse_start + PULSE_WIDTH X[batch_i, pulse_start:pulse_end, 0] = OFF t[batch_i, :, 0] = X[batch_i, :, 0].copy() X += noise() return X, t X_val, t_val = gen_data() # Configure layers layers = [InputLayer(shape=SHAPE)] for i in range(N_HIDDEN_LAYERS): layer = RecurrentLayer( layers[-1], N_UNITS_PER_LAYER, nonlinearity=tanh, W_in_to_hid=Normal(std=1.0 / np.sqrt(layers[-1].get_output_shape()[-1])), gradient_steps=100) layers.append(layer) layers.append( ReshapeLayer(layers[-1], (N_SEQ_PER_BATCH * SEQ_LENGTH, N_UNITS_PER_LAYER))) layers.append( MixtureDensityLayer(layers[-1], num_units=t_val.shape[-1], num_components=N_COMPONENTS, min_sigma=0)) print("Total parameters: {}".format( sum([