def test_lstm_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) in_cell_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_inp_cell = InputLayer(in_cell_shp) l_rec_inp_layer = LSTMLayer(l_inp, n_units, hid_init=l_inp_h, cell_init=l_inp_cell, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = LSTMLayer(l_inp, n_units, hid_init=Xh_test, cell_init=Xc_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() Xc = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh, l_inp_cell: Xc}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch, Xc: Xc_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def test_lstm_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) in_cell_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_inp_cell = InputLayer(in_cell_shp) l_rec_inp_layer = LSTMLayer(l_inp, n_units, hid_init=l_inp_h, cell_init=l_inp_cell, nonlinearity=None) # network with `np.array` initializer for hid_init l_rec_nparray = LSTMLayer(l_inp, n_units, hid_init=Xh_test, cell_init=Xc_test, nonlinearity=None) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in list(l_rn_param.items()): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() Xc = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {l_inp: X, l_inp_h: Xh, l_inp_cell: Xc}) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch, Xc: Xc_test_batch}) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def test_lasagne_ctc(): import lasagne from lasagne.layers import ( LSTMLayer, InputLayer, DenseLayer, NonlinearityLayer, ReshapeLayer, EmbeddingLayer, RecurrentLayer, ) import theano import theano.tensor as T import numpy as np num_batch, input_seq_len = 1, 12 num_classes = 5 target_seq_len = 3 num_rnn_units = 50 def print_pred(y_hat): blank_symbol = num_classes res = [] for i, s in enumerate(y_hat): if (s != blank_symbol) and (i == 0 or s != y_hat[i - 1]): res += [s] if len(res) > 0: return "".join(map(str, list(res))) else: return "-" * target_seq_len Y_hat = np.asarray(np.random.normal(0, 1, (input_seq_len, num_batch, num_classes + 1)), dtype=floatX) Y = np.zeros((target_seq_len, num_batch), dtype="int64") Y[25:, :] = 1 Y_hat_mask = np.ones((input_seq_len, num_batch), dtype=floatX) Y_hat_mask[-5:] = 0 # default blank symbol is the highest class index (3 in this case) Y_mask = np.asarray(np.ones_like(Y), dtype=floatX) X = np.random.random((num_batch, input_seq_len)).astype("int32") y = T.imatrix("phonemes") x = T.imatrix() # batchsize, input_seq_len, features print "num_batch =", num_batch, "input_seq_len =", input_seq_len print "num_classes =", num_classes # setup Lasagne Recurrent network # The output from the network is shape # a) output_lin_ctc is the activation before softmax (input_seq_len, batch_size, num_classes + 1) # b) ouput_softmax is the output after softmax (batch_size, input_seq_len, num_classes + 1) l_inp = InputLayer((num_batch, input_seq_len)) netshape = lasagne.layers.get_output_shape(l_inp) print ("Layer l_inp shape:") print (netshape) l_emb = EmbeddingLayer( l_inp, input_size=num_classes + 1, output_size=num_classes + 1, W=np.identity(num_classes + 1).astype("float32") ) netshape = lasagne.layers.get_output_shape(l_emb) print ("Layer l_emb shape:") print (netshape) l_rnn = LSTMLayer(l_emb, num_units=num_rnn_units) netshape = lasagne.layers.get_output_shape(l_rnn) print ("Layer l_rnn shape:") print (netshape) l_rnn_shp = ReshapeLayer(l_rnn, (num_batch * input_seq_len, num_rnn_units)) netshape = lasagne.layers.get_output_shape(l_rnn_shp) print ("Layer l_rnn_shp shape:") print (netshape) l_out = DenseLayer(l_rnn_shp, num_units=num_classes + 1, nonlinearity=lasagne.nonlinearities.identity) # + blank netshape = lasagne.layers.get_output_shape(l_out) print ("Layer l_out shape:") print (netshape) l_out_shp = ReshapeLayer(l_out, (num_batch, input_seq_len, num_classes + 1)) netshape = lasagne.layers.get_output_shape(l_out_shp) print ("Layer l_out_shp shape:") print (netshape) # dimshuffle to shape format (input_seq_len, batch_size, num_classes + 1) # l_out_shp_ctc = lasagne.layers.DimshuffleLayer(l_out_shp, (1, 0, 2)) l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) netshape = lasagne.layers.get_output_shape(l_out_softmax) print ("Layer l_out_softmax shape:") print (netshape) l_out_softmax_shp = ReshapeLayer(l_out_softmax, (num_batch, input_seq_len, num_classes + 1)) netshape = lasagne.layers.get_output_shape(l_out_softmax_shp) print ("Layer l_out_softmax_shp shape:") print (netshape) output_lin_ctc = lasagne.layers.get_output(l_out_shp, x) output_softmax = lasagne.layers.get_output(l_out_softmax_shp, x) all_params = l_rnn.get_params(trainable=True) # dont learn embeddingl print "x type:", type(x) print "x shape", x.shape print "y type:", type(y) print "y shape", y.shape ############### # GRADIENTS # ############### # the CTC cross entropy between y and linear output network # (num_batch,t,class+1) # output_lin_ctc shape (1,12,6) pseudo_cost = ctc_cost.pseudo_cost(y, output_lin_ctc) # calculate the gradients of the CTC wrt. linar output of network pseudo_cost_grad = T.grad(pseudo_cost.sum() / num_batch, all_params) true_cost = ctc_cost.cost(y, output_softmax) cost = T.mean(true_cost) sh_lr = theano.shared(lasagne.utils.floatX(0.01)) updates = lasagne.updates.rmsprop(pseudo_cost_grad, all_params, learning_rate=sh_lr) # x shape (1,12) # y shape (1,3) train = theano.function([x, y], [output_lin_ctc, output_softmax, cost, pseudo_cost], updates=updates) # Create test dataset num_samples = 10 np.random.seed(1234) # create simple dataset of format # input [5,5,5,5,5,2,2,2,2,2,3,3,3,3,3,....,1,1,1,1] # targets [5,2,3,...,1] # etc... input_lst, output_lst = [], [] for i in range(num_samples): this_input = [] this_output = [] for j in range(target_seq_len): this_class = np.random.randint(num_classes) this_input += [this_class] * 3 + [num_classes] this_output += [this_class] this_input += (input_seq_len - len(this_input)) * [this_input[-1]] input_lst.append(this_input) output_lst.append(this_output) print this_input, this_output input_arr = np.concatenate([input_lst]).astype("int32") y_arr = np.concatenate([output_lst]).astype("int32") print "y_arr shape:", y_arr.shape y_mask_arr = np.ones((num_batch, target_seq_len), dtype="float32") input_mask_arr = np.ones((num_batch, input_seq_len), dtype="float32") for nn in range(1000): cost_lst = [] shuffle = np.random.permutation(num_samples) for i in range(num_samples // num_batch): idx = shuffle[i * num_batch : (i + 1) * num_batch] _, output_softmax_val, cost, pseudo_cost_val = train(input_arr[idx], y_arr[idx]) print "x=", input_arr[idx] # x shape (1,12) print "x shape", input_arr[idx].shape print "y=", y_arr[idx] # y shape (1,3) print "y shape", y_arr[idx].shape output_softmax_lst = output_softmax_val labels_lst = y_arr[idx] cost_lst += [cost] # testing.assert_almost_equal(pseudo_cost, pseudo_cost_old, decimal=4) # testing.assert_array_almost_equal(pseudo_cost_val, pseudo_cost_old_val) if (nn + 1) % 20 == 0: DECAY = 1.5 new_lr = lasagne.utils.floatX(sh_lr.get_value() / DECAY) sh_lr.set_value(new_lr) print "----------------------->NEW LR:", new_lr print nn, "Mean cost:", np.mean(cost_lst) if (nn + 1) % 4 == 0: for jj in range(num_batch): pred = print_pred(np.argmax(output_softmax_val[jj], axis=-1)) true = "".join(map(str, labels_lst[jj])) pred += (target_seq_len - len(pred)) * " " print "pred =", pred, "true =", true
def test_lasagne_ctc(): import lasagne from lasagne.layers import LSTMLayer, InputLayer, DenseLayer,\ NonlinearityLayer, ReshapeLayer, EmbeddingLayer, RecurrentLayer import theano import theano.tensor as T import numpy as np num_batch, input_seq_len = 10, 15 num_classes = 10 target_seq_len = 5 num_rnn_units = 50 input_seq_len += target_seq_len def print_pred(y_hat): blank_symbol = num_classes res = [] for i, s in enumerate(y_hat): if (s != blank_symbol) and (i == 0 or s != y_hat[i - 1]): res += [s] if len(res) > 0: return "".join(map(str, list(res))) else: return "-" * target_seq_len Y_hat = np.asarray(np.random.normal( 0, 1, (input_seq_len, num_batch, num_classes + 1)), dtype=floatX) Y = np.zeros((target_seq_len, num_batch), dtype='int64') Y[25:, :] = 1 Y_hat_mask = np.ones((input_seq_len, num_batch), dtype=floatX) Y_hat_mask[-5:] = 0 # default blank symbol is the highest class index (3 in this case) Y_mask = np.asarray(np.ones_like(Y), dtype=floatX) X = np.random.random((num_batch, input_seq_len)).astype('int32') y = T.imatrix('phonemes') x = T.imatrix() # batchsize, input_seq_len, features # setup Lasagne Recurrent network # The output from the network is shape # a) output_lin_ctc is the activation before softmax (input_seq_len, batch_size, num_classes + 1) # b) ouput_softmax is the output after softmax (batch_size, input_seq_len, num_classes + 1) l_inp = InputLayer((num_batch, input_seq_len)) l_emb = EmbeddingLayer(l_inp, input_size=num_classes + 1, output_size=num_classes + 1, W=np.identity(num_classes + 1).astype('float32')) ini = lasagne.init.Uniform(0.1) zero = lasagne.init.Constant(0.0) cell = lasagne.init.Uniform(0.1) l_rnn = LSTMLayer(l_emb, num_units=num_rnn_units, peepholes=True, W_in_to_ingate=ini, W_hid_to_ingate=ini, b_ingate=zero, W_in_to_forgetgate=ini, W_hid_to_forgetgate=ini, b_forgetgate=zero, W_in_to_cell=ini, W_hid_to_cell=ini, b_cell=zero, W_in_to_outgate=ini, W_hid_to_outgate=ini, b_outgate=zero, cell_init=lasagne.init.Constant(0.), hid_init=lasagne.init.Constant(0.), W_cell_to_forgetgate=cell, W_cell_to_ingate=cell, W_cell_to_outgate=cell) l_rnn_shp = ReshapeLayer(l_rnn, (num_batch * input_seq_len, num_rnn_units)) l_out = DenseLayer(l_rnn_shp, num_units=num_classes + 1, nonlinearity=lasagne.nonlinearities.identity) # + blank l_out_shp = ReshapeLayer(l_out, (num_batch, input_seq_len, num_classes + 1)) # dimshuffle to shape format (input_seq_len, batch_size, num_classes + 1) #l_out_shp_ctc = lasagne.layers.DimshuffleLayer(l_out_shp, (1, 0, 2)) l_out_softmax = NonlinearityLayer( l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_shp = ReshapeLayer( l_out_softmax, (num_batch, input_seq_len, num_classes + 1)) output_lin_ctc = lasagne.layers.get_output(l_out_shp, x) output_softmax = lasagne.layers.get_output(l_out_softmax_shp, x) all_params = l_rnn.get_params(trainable=True) # dont learn embeddingl print all_params ############### # GRADIENTS # ############### # the CTC cross entropy between y and linear output network pseudo_cost = ctc_cost.pseudo_cost(y, output_lin_ctc) # calculate the gradients of the CTC wrt. linar output of network pseudo_cost_grad = T.grad(pseudo_cost.sum() / num_batch, all_params) true_cost = ctc_cost.cost(y, output_softmax) cost = T.mean(true_cost) sh_lr = theano.shared(lasagne.utils.floatX(0.01)) #updates = lasagne.updates.sgd(pseudo_cost_grad, all_params, learning_rate=sh_lr) #updates = lasagne.updates.apply_nesterov_momentum(updates, all_params, momentum=0.9) updates = lasagne.updates.rmsprop(pseudo_cost_grad, all_params, learning_rate=sh_lr) train = theano.function( [x, y], [output_lin_ctc, output_softmax, cost, pseudo_cost], updates=updates) # Create test dataset num_samples = 1000 np.random.seed(1234) # create simple dataset of format # input [5,5,5,5,5,2,2,2,2,2,3,3,3,3,3,....,1,1,1,1] # targets [5,2,3,...,1] # etc... input_lst, output_lst = [], [] for i in range(num_samples): this_input = [] this_output = [] for j in range(target_seq_len): this_class = np.random.randint(num_classes) this_input += [this_class] * 3 + [num_classes] this_output += [this_class] this_input += (input_seq_len - len(this_input)) * [this_input[-1]] input_lst.append(this_input) output_lst.append(this_output) print this_input, this_output input_arr = np.concatenate([input_lst]).astype('int32') y_arr = np.concatenate([output_lst]).astype('int32') y_mask_arr = np.ones((num_batch, target_seq_len), dtype='float32') input_mask_arr = np.ones((num_batch, input_seq_len), dtype='float32') for nn in range(10000): cost_lst = [] shuffle = np.random.permutation(num_samples) for i in range(num_samples // num_batch): idx = shuffle[i * num_batch:(i + 1) * num_batch] _, output_softmax_val, cost, pseudo_cost_val = train( input_arr[idx], y_arr[idx]) output_softmax_lst = output_softmax_val labels_lst = y_arr[idx] cost_lst += [cost] #testing.assert_almost_equal(pseudo_cost, pseudo_cost_old, decimal=4) #testing.assert_array_almost_equal(pseudo_cost_val, pseudo_cost_old_val) if (nn + 1) % 200 == 0: DECAY = 1.5 new_lr = lasagne.utils.floatX(sh_lr.get_value() / DECAY) sh_lr.set_value(new_lr) print "----------------------->NEW LR:", new_lr print nn, "Mean cost:", np.mean(cost_lst) if (nn + 1) % 4 == 0: for jj in range(num_batch): pred = print_pred(np.argmax(output_softmax_val[jj], axis=-1)) true = "".join(map(str, labels_lst[jj])) pred += (target_seq_len - len(pred)) * " " print pred, true
def test_lasagne_ctc(): import lasagne from lasagne.layers import LSTMLayer, InputLayer, DenseLayer,\ NonlinearityLayer, ReshapeLayer, EmbeddingLayer, RecurrentLayer import theano import theano.tensor as T import numpy as np num_batch, input_seq_len = 10, 15 num_classes = 10 target_seq_len = 5 num_rnn_units = 50 input_seq_len += target_seq_len def print_pred(y_hat): blank_symbol = num_classes res = [] for i, s in enumerate(y_hat): if (s != blank_symbol) and (i == 0 or s != y_hat[i - 1]): res += [s] if len(res) > 0: return "".join(map(str, list(res))) else: return "-"*target_seq_len Y_hat = np.asarray(np.random.normal( 0, 1, (input_seq_len, num_batch, num_classes + 1)), dtype=floatX) Y = np.zeros((target_seq_len, num_batch), dtype='int64') Y[25:, :] = 1 Y_hat_mask = np.ones((input_seq_len, num_batch), dtype=floatX) Y_hat_mask[-5:] = 0 # default blank symbol is the highest class index (3 in this case) Y_mask = np.asarray(np.ones_like(Y), dtype=floatX) X = np.random.random( (num_batch, input_seq_len)).astype('int32') y = T.imatrix('phonemes') x = T.imatrix() # batchsize, input_seq_len, features # setup Lasagne Recurrent network # The output from the network is shape # a) output_lin_ctc is the activation before softmax (input_seq_len, batch_size, num_classes + 1) # b) ouput_softmax is the output after softmax (batch_size, input_seq_len, num_classes + 1) l_inp = InputLayer((num_batch, input_seq_len)) l_emb = EmbeddingLayer(l_inp, input_size=num_classes+1, output_size=num_classes+1, W=np.identity(num_classes+1).astype('float32')) ini = lasagne.init.Uniform(0.1) zero = lasagne.init.Constant(0.0) cell = lasagne.init.Uniform(0.1) l_rnn = LSTMLayer(l_emb, num_units=num_rnn_units, peepholes=True, W_in_to_ingate=ini, W_hid_to_ingate=ini, b_ingate=zero, W_in_to_forgetgate=ini, W_hid_to_forgetgate=ini, b_forgetgate=zero, W_in_to_cell=ini, W_hid_to_cell=ini, b_cell=zero, W_in_to_outgate=ini, W_hid_to_outgate=ini, b_outgate=zero, cell_init=lasagne.init.Constant(0.), hid_init=lasagne.init.Constant(0.), W_cell_to_forgetgate=cell, W_cell_to_ingate=cell, W_cell_to_outgate=cell) l_rnn_shp = ReshapeLayer(l_rnn, (num_batch*input_seq_len, num_rnn_units)) l_out = DenseLayer(l_rnn_shp, num_units=num_classes+1, nonlinearity=lasagne.nonlinearities.identity) # + blank l_out_shp = ReshapeLayer(l_out, (num_batch, input_seq_len, num_classes+1)) # dimshuffle to shape format (input_seq_len, batch_size, num_classes + 1) #l_out_shp_ctc = lasagne.layers.DimshuffleLayer(l_out_shp, (1, 0, 2)) l_out_softmax = NonlinearityLayer( l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_shp = ReshapeLayer( l_out_softmax, (num_batch, input_seq_len, num_classes+1)) output_lin_ctc = lasagne.layers.get_output(l_out_shp, x) output_softmax = lasagne.layers.get_output(l_out_softmax_shp, x) all_params = l_rnn.get_params(trainable=True) # dont learn embeddingl print all_params ############### # GRADIENTS # ############### # the CTC cross entropy between y and linear output network pseudo_cost = ctc_cost.pseudo_cost( y, output_lin_ctc) # calculate the gradients of the CTC wrt. linar output of network pseudo_cost_grad = T.grad(pseudo_cost.sum() / num_batch, all_params) true_cost = ctc_cost.cost(y, output_softmax) cost = T.mean(true_cost) sh_lr = theano.shared(lasagne.utils.floatX(0.01)) #updates = lasagne.updates.sgd(pseudo_cost_grad, all_params, learning_rate=sh_lr) #updates = lasagne.updates.apply_nesterov_momentum(updates, all_params, momentum=0.9) updates = lasagne.updates.rmsprop(pseudo_cost_grad, all_params, learning_rate=sh_lr) train = theano.function([x, y], [output_lin_ctc, output_softmax, cost, pseudo_cost], updates=updates) # Create test dataset num_samples = 1000 np.random.seed(1234) # create simple dataset of format # input [5,5,5,5,5,2,2,2,2,2,3,3,3,3,3,....,1,1,1,1] # targets [5,2,3,...,1] # etc... input_lst, output_lst = [], [] for i in range(num_samples): this_input = [] this_output = [] for j in range(target_seq_len): this_class = np.random.randint(num_classes) this_input += [this_class]*3 + [num_classes] this_output += [this_class] this_input += (input_seq_len - len(this_input))*[this_input[-1]] input_lst.append(this_input) output_lst.append(this_output) print this_input, this_output input_arr = np.concatenate([input_lst]).astype('int32') y_arr = np.concatenate([output_lst]).astype('int32') y_mask_arr = np.ones((num_batch, target_seq_len), dtype='float32') input_mask_arr = np.ones((num_batch, input_seq_len), dtype='float32') for nn in range(10000): cost_lst = [] shuffle = np.random.permutation(num_samples) for i in range(num_samples//num_batch): idx = shuffle[i*num_batch:(i+1)*num_batch] _, output_softmax_val, cost, pseudo_cost_val = train( input_arr[idx], y_arr[idx]) output_softmax_lst = output_softmax_val labels_lst = y_arr[idx] cost_lst += [cost] #testing.assert_almost_equal(pseudo_cost, pseudo_cost_old, decimal=4) #testing.assert_array_almost_equal(pseudo_cost_val, pseudo_cost_old_val) if (nn+1) % 200 == 0: DECAY = 1.5 new_lr = lasagne.utils.floatX(sh_lr.get_value() / DECAY) sh_lr.set_value(new_lr) print "----------------------->NEW LR:", new_lr print nn, "Mean cost:", np.mean(cost_lst) if (nn+1) % 4 == 0: for jj in range(num_batch): pred = print_pred(np.argmax(output_softmax_val[jj], axis=-1)) true = "".join(map(str, labels_lst[jj])) pred += (target_seq_len-len(pred)) * " " print pred, true