def getTrainedRNN(): """Read from file and set the params""" # TODO: Refactor so as to do this only once) input_size = 39 hidden_size = 50 num_output_classes = 29 learning_rate = 0.001 output_size = num_output_classes + 1 batch_size = None input_seq_length = None gradient_clipping = 5 l_in = InputLayer(shape=(batch_size, input_seq_length, input_size)) n_batch, n_time_steps, n_features = l_in.input_var.shape # Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu) l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu) l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True) l_rec_accumulation = ElemwiseSumLayer([l_rec_forward, l_rec_backward]) l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1, hidden_size)) l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu) l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear) l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size)) # Reshaping back l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size)) with np.load('CTC_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable=True) output = lasagne.layers.get_output(l_out_softmax_reshaped) return l_in, output
def create_blstm_dropout(input_vars, mask_vars, num_inputs, hidden_layer_size, num_outputs, dropout=0.2, noise=0.2): network = InputLayer((None, None, num_inputs), input_vars) mask = InputLayer((None, None), mask_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=noise) for i in range(4): forward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True) backward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True, backwards=True) network = DropoutLayer( GaussianNoiseLayer(ElemwiseSumLayer([forward, backward]), noise), dropout) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def build_reconst_net(hidden_rep, embedding, n_feats, gamma, softmax_size=None): """ softmax_size : int or None If None, a standard rectifier activation function will be used. If an int is provided, a group softmax will be applied over the outputs with the size of the groups being given by the value of `softmax_size`. """ # Reconstruct the input using dec_feat_emb if gamma > 0: if softmax_size is None: reconst_net = DenseLayer(hidden_rep, num_units=n_feats, W=embedding.T, nonlinearity=rectify) else: """ # Code for convolution decoder reconst_net = ReshapeLayer(hidden_rep, (128, 8, n_feats / softmax_size)) reconst_net = Conv1DLayer(reconst_net, 3, 9, pad="same", nonlinearity=None) reconst_net = DimshuffleLayer(reconst_net, (0, 2, 1)) """ reconst_net = DenseLayer(hidden_rep, num_units=n_feats, W=embedding.T, nonlinearity=linear) reconst_net = ReshapeLayer(reconst_net, ((-1, softmax_size))) reconst_net = NonlinearityLayer(reconst_net, softmax) reconst_net = ReshapeLayer(reconst_net, ((-1, n_feats))) else: reconst_net = None return reconst_net
def __init__(self, steps = 1, num_layers = 2, num_units = 32, eps = 1e-2): self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.dt = T.scalar('dt') self.matrix_inv = T.nlinalg.MatrixInverse() self.ar = AutoRegressiveModel(steps = steps, num_layers = num_layers, num_units = num_units, eps = eps) l = InputLayer(input_var = self.X, shape = (steps,)) l = ReshapeLayer(l, shape = (1,steps,)) l = self.ar.network(l) l = ReshapeLayer(l, shape=(1,)) self.l_ = l self.f_ = get_output(self.l_) self.X_ = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.X)], axis=0) self.fX_ = G.jacobian(self.X_.flatten(), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + \ T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:]) self.h = T.dot(T.eye(steps)[0:1], self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True)
def rnn_orig(input_var, seq_len, sz=51): def add_shapes(sh1, sh2, axis=2): if isinstance(sh2, tuple): return sh1[:axis] + (sh1[axis] + sh2[axis], ) + sh1[axis + 1:] else: return sh1[:axis] + (sh1[axis] + sh2, ) + sh1[axis + 1:] ret = {} ret['input'] = in_layer = InputLayer((None, seq_len, 2, sz, sz), input_var) ret['in_to_hid'] = in_to_hid = Conv2DLayer(InputLayer((None, 2, sz, sz)), 16, 7, pad=3, nonlinearity=sigmoid) ret['post_concat'] = post_concat = Conv2DLayer(InputLayer( add_shapes(in_to_hid.output_shape, 32, 1)), 32, 7, pad=3, nonlinearity=sigmoid) ret['hid_to_hid'] = hid_to_hid = NonlinearityLayer(InputLayer( post_concat.output_shape), nonlinearity=None) ret['rec'] = f = crl.ConcatRecurrentLayer(in_layer, in_to_hid, hid_to_hid, post_concat) ret['rec_resh'] = f = ReshapeLayer(f, (-1, [2], [3], [4])) ret['y_pre'] = f = Conv2DLayer(f, 1, 7, pad=3, nonlinearity=sigmoid) ret['output'] = f = ReshapeLayer(f, (-1, seq_len, [1], [2], [3])) return ret, nn.layers.get_output(ret['output']), nn.layers.get_output( ret['output'], deterministic=True)
def __init__(self): print("Initialising network...") import theano import theano.tensor as T import lasagne from lasagne.layers import (InputLayer, LSTMLayer, ReshapeLayer, ConcatLayer, DenseLayer) theano.config.compute_test_value = 'raise' # Construct LSTM RNN: One LSTM layer and one dense output layer l_in = InputLayer(shape=input_shape) # setup fwd and bck LSTM layer. l_fwd = LSTMLayer( l_in, N_HIDDEN, backwards=False, learn_init=True, peepholes=True) l_bck = LSTMLayer( l_in, N_HIDDEN, backwards=True, learn_init=True, peepholes=True) # concatenate forward and backward LSTM layers concat_shape = (N_SEQ_PER_BATCH * SEQ_LENGTH, N_HIDDEN) l_fwd_reshape = ReshapeLayer(l_fwd, concat_shape) l_bck_reshape = ReshapeLayer(l_bck, concat_shape) l_concat = ConcatLayer([l_fwd_reshape, l_bck_reshape], axis=1) l_recurrent_out = DenseLayer(l_concat, num_units=N_OUTPUTS, nonlinearity=None) l_out = ReshapeLayer(l_recurrent_out, output_shape) input = T.tensor3('input') target_output = T.tensor3('target_output') # add test values input.tag.test_value = rand( *input_shape).astype(theano.config.floatX) target_output.tag.test_value = rand( *output_shape).astype(theano.config.floatX) print("Compiling Theano functions...") # Cost = mean squared error cost = T.mean((l_out.get_output(input) - target_output)**2) # Use NAG for training all_params = lasagne.layers.get_all_params(l_out) updates = lasagne.updates.nesterov_momentum(cost, all_params, LEARNING_RATE) # Theano functions for training, getting output, and computing cost self.train = theano.function( [input, target_output], cost, updates=updates, on_unused_input='warn', allow_input_downcast=True) self.y_pred = theano.function( [input], l_out.get_output(input), on_unused_input='warn', allow_input_downcast=True) self.compute_cost = theano.function( [input, target_output], cost, on_unused_input='warn', allow_input_downcast=True) print("Done initialising network.")
def TransitionalNormalizeLayer(inputs, n_directions): """ Performs 1x1 convolution followed by softmax nonlinearity. The output will have the shape (batch_size * n_rows * n_cols, n_classes) """ l = Conv2DLayer(inputs, n_directions**2, filter_size=1, nonlinearity=linear, W=HeUniform(gain='relu'), pad='same', flip_filters=False, stride=1) # We perform the softmax nonlinearity in 2 steps : # 1. Reshape from (batch_size, n_classes, n_rows, n_cols) to (batch_size * n_rows * n_cols, n_classes) # 2. Apply softmax batch_size, n_channels, n_rows, n_cols = get_output(l).shape l = ReshapeLayer(l, (batch_size, n_directions, n_directions, n_rows, n_cols)) l = DimshuffleLayer(l, (0, 1, 3, 4, 2)) l = ReshapeLayer( l, (batch_size * n_directions * n_rows * n_cols, n_directions)) l = NormalizeLayer(l) l = ReshapeLayer(l, (batch_size, n_directions, n_rows, n_cols, n_directions)) l = DimshuffleLayer(l, (0, 1, 4, 2, 3)) l = ReshapeLayer(l, (batch_size, n_channels, n_rows, n_cols)) return l
def SoftmaxLayer(inputs, n_classes): """ Performs 1x1 convolution followed by softmax nonlinearity The output will have the shape (batch_size * n_rows * n_cols, n_classes) """ l = Conv2DLayer(inputs, n_classes, filter_size=1, nonlinearity=linear, W=HeUniform(gain='relu'), pad='same', flip_filters=False, stride=1) # We perform the softmax nonlinearity in 2 steps : # 1. Reshape from (batch_size, n_classes, n_rows, n_cols) to (batch_size * n_rows * n_cols, n_classes) # 2. Apply softmax l = DimshuffleLayer(l, (0, 2, 3, 1)) batch_size, n_rows, n_cols, _ = get_output(l).shape l = ReshapeLayer(l, (batch_size * n_rows * n_cols, n_classes)) l = NonlinearityLayer(l, softmax) l = ReshapeLayer(l, (batch_size, n_rows, n_cols, n_classes)) l = DimshuffleLayer(l, (0, 3, 1, 2)) l = ReshapeLayer(l, (batch_size, n_classes, n_rows, n_cols)) return l
def model(input_var, batch_size=1, size=1, num_units=100, memory_shape=(128, 20)): # Input Layer l_input = InputLayer((batch_size, None, size + 1), input_var=input_var) _, seqlen, _ = l_input.input_var.shape # Neural Turing Machine Layer memory = Memory(memory_shape, name='memory', memory_init=lasagne.init.Constant(1e-6), learn_init=False) controller = DenseController(l_input, memory_shape=memory_shape, num_units=num_units, num_reads=1, nonlinearity=lasagne.nonlinearities.rectify, name='controller') heads = [ WriteHead(controller, num_shifts=3, memory_shape=memory_shape, name='write', learn_init=False, nonlinearity_key=lasagne.nonlinearities.rectify, nonlinearity_add=lasagne.nonlinearities.rectify), ReadHead(controller, num_shifts=3, memory_shape=memory_shape, name='read', learn_init=False, nonlinearity_key=lasagne.nonlinearities.rectify) ] l_ntm = NTMLayer(l_input, memory=memory, controller=controller, heads=heads) # Output Layer l_output_reshape = ReshapeLayer(l_ntm, (-1, num_units)) l_output_dense = DenseLayer(l_output_reshape, num_units=size + 1, nonlinearity=lasagne.nonlinearities.sigmoid, \ name='dense') l_output = ReshapeLayer(l_output_dense, (batch_size, seqlen, size + 1)) return l_output, l_ntm
def build_res_rnn_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) for l in range(1, num_layers+1): hidini=0 if l==num_layers: hidini=U_lowbound net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1)) net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],hidden_units,W=ini_W,b=Uniform(range=(0,args.ini_b)),nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size, -1)) net['rnn%d'%l]=net['rnn%d'%(l-1)] if not args.use_bn_afterrnn: net['rnn%d'%l]=BatchNormLayer(net['rnn%d'%l],axes= (0,1),beta=Uniform(range=(0,args.ini_b))) net['rnn%d'%l]=rnnmodel(net['rnn%d'%l],hidden_units,W_hid_to_hid=Uniform(range=(hidini,U_bound)),nonlinearity=act,only_return_final=False, grad_clipping=args.gradclipvalue) if args.use_bn_afterrnn: net['rnn%d'%l]=BatchNormLayer(net['rnn%d'%l],axes= (0,1)) if l==num_layers: net['rnn%d'%num_layers]=lasagne.layers.SliceLayer(net['rnn%d'%num_layers],indices=-1, axis=0) net['out']=DenseLayer(net['rnn%d'%num_layers],outputclass,nonlinearity=softmax) return net
def smooth_convolution(prediction, n_classes): from lasagne.layers import Conv1DLayer as ConvLayer from lasagne.layers import DimshuffleLayer, ReshapeLayer prediction = ReshapeLayer(prediction, (-1, 200, n_classes)) # channels first prediction = DimshuffleLayer(prediction, (0, 2, 1)) input_size = lasagne.layers.get_output(prediction).shape # reshape to put each channel in the batch dimensions, to filter each # channel independently prediction = ReshapeLayer(prediction, (T.prod(input_size[0:2]), 1, input_size[2])) trans_filter = np.tile(np.array([0, -1., 1.]).astype('float32'), (1, 1, 1)) convolved = ConvLayer(prediction, num_filters=1, filter_size=3, stride=1, b=None, nonlinearity=None, W=trans_filter, pad='same') # reshape back convolved = ReshapeLayer(convolved, input_size) return convolved
def inverse_convolution_strided_layer(input_layer, original_layer): return ReshapeLayer(SliceLayer( TransposedConv2DLayer(ReshapeLayer(input_layer, (-1, original_layer.output_shape[1], 1, original_layer.output_shape[2])), original_layer.input_layer.num_filters, (1, original_layer.filter_size[0]), stride=(1, original_layer.stride[0]), crop=(0, 0), flip_filters=original_layer.flip_filters, nonlinearity=nonlinearities.leaky_rectify), indices=slice(None, -1), axis=-1), (-1, original_layer.input_shape[1], original_layer.input_shape[2]))
def build_res_stafg(): net = collections.OrderedDict() # INPUTS---------------------------------------- net['sent_input'] = InputLayer((None, CFG['SEQUENCE LENGTH']), input_var=T.imatrix()) net['word_emb'] = EmbeddingLayer(net['sent_input'], input_size=CFG['VOCAB SIZE']+3,\ output_size=CFG['WORD VECTOR SIZE'],W=np.copy(CFG['wemb'])) net['vis_input'] = InputLayer((None,CFG['VISUAL LENGTH'], CFG['VIS SIZE'])) # key words model------------------------------------- net['vis_mean_pool'] = FeaturePoolLayer(net['vis_input'], CFG['VISUAL LENGTH'],pool_function=T.mean) net['ctx_vis_reshp'] = ReshapeLayer(net['vis_mean_pool'],(-1,CFG['VIS SIZE'])) net['global_vis'] = DenseLayer(net['ctx_vis_reshp'],num_units=CFG['EMBEDDING SIZE'],nonlinearity=linear) net['key_words_prob'] = DenseLayer(DropoutLayer(net['global_vis']), num_units=CFG['VOCAB SIZE']+3,nonlinearity=sigmoid) # gru model-------------------------------------- net['mask_input'] = InputLayer((None, CFG['SEQUENCE LENGTH'])) net['sgru'] = GRULayer(net['word_emb'],num_units=CFG['EMBEDDING SIZE'], \ mask_input=net['mask_input'],hid_init=net['global_vis']) net['sta_gru'] = CTXAttentionGRULayer([net['sgru'],net['vis_input'],net['global_vis']], num_units=CFG['EMBEDDING SIZE'], mask_input=net['mask_input']) net['fusion'] = DropoutLayer(ConcatLayer([net['sta_gru'],net['gru']],axis=2), p=0.5) net['fusion_reshp'] = ReshapeLayer(net['fusion'], (-1,CFG['EMBEDDING SIZE']*2)) net['word_prob'] = DenseLayer(net['fusion_reshp'], num_units=CFG['VOCAB SIZE']+3, nonlinearity=softmax) net['sent_prob'] = ReshapeLayer(net['word_prob'],(-1,CFG['SEQUENCE LENGTH'], CFG['VOCAB SIZE']+3)) return net
def build_rnn(conv_input_var, seq_input_var, conv_shape, word_dims, n_hid, lstm_layers): ret = {} ret['seq_input'] = seq_layer = InputLayer((None, None, word_dims), input_var=seq_input_var) batchsize, seqlen, _ = seq_layer.input_var.shape ret['seq_resh'] = seq_layer = ReshapeLayer(seq_layer, shape=(-1, word_dims)) ret['seq_proj'] = seq_layer = DenseLayer(seq_layer, num_units=n_hid) ret['seq_resh2'] = seq_layer = ReshapeLayer(seq_layer, shape=(batchsize, seqlen, n_hid)) ret['conv_input'] = conv_layer = InputLayer(conv_shape, input_var=conv_input_var) ret['conv_proj'] = conv_layer = DenseLayer(conv_layer, num_units=n_hid) ret['conv_resh'] = conv_layer = ReshapeLayer(conv_layer, shape=([0], 1, -1)) ret['input_concat'] = layer = ConcatLayer([conv_layer, seq_layer], axis=1) for lstm_layer_idx in xrange(lstm_layers): ret['lstm_{}'.format(lstm_layer_idx)] = layer = LSTMLayer(layer, n_hid) ret['out_resh'] = layer = ReshapeLayer(layer, shape=(-1, n_hid)) ret['output_proj'] = layer = DenseLayer(layer, num_units=word_dims, nonlinearity=log_softmax) ret['output'] = layer = ReshapeLayer(layer, shape=(batchsize, seqlen + 1, word_dims)) ret['output'] = layer = SliceLayer(layer, indices=slice(None, -1), axis=1) return ret
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)')): dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W) weights.append(dbn_layers[2].W) weights.append(dbn_layers[3].W) weights.append(dbn_layers[4].W) biases.append(dbn_layers[1].b) biases.append(dbn_layers[2].b) biases.append(dbn_layers[3].b) biases.append(dbn_layers[4].b) gate_parameters = Gate(W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(weights, biases, l_reshape1) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer( l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice1, num_units=26, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def build_generator(input_noise=None, input_text=None): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer, batch_norm, ConcatLayer from lasagne.nonlinearities import sigmoid # input: 100dim layer = InputLayer(shape=(None, noise_dim), input_var=input_noise) layer2 = InputLayer(shape=(None,1,300), input_var=input_text) layer2 = ReshapeLayer(layer2, ([0], 1*300)) layer = ConcatLayer([layer, layer2], axis=1) #increasing order of fc-layer for i in range(len(fclayer_list)): layer = batch_norm(DenseLayer(layer, fclayer_list[i])) newPS = 28 if stride!=1: newPS = 28/(2**len(layer_list)) layer = batch_norm(DenseLayer(layer, layer_list[0]*newPS*newPS)) layer = ReshapeLayer(layer, ([0], layer_list[0], newPS, newPS)) for i in range(1,len(layer_list)): layer = batch_norm(Deconv2DLayer(layer, layer_list[i], filter_sz, stride=stride, pad=(filter_sz-1)/2)) layer = Deconv2DLayer(layer, 1, filter_sz, stride=stride, pad=(filter_sz-1)/2, nonlinearity=sigmoid) print ("Generator output:", layer.output_shape) return layer
def build_lstm_decorer(): net = collections.OrderedDict() net['sent_input'] = InputLayer((None, CFG['SEQUENCE LENGTH'] - 1), input_var=T.imatrix()) net['word_emb'] = EmbeddingLayer(net['sent_input'], input_size=CFG['VOCAB SIZE'],\ output_size=CFG['EMBEDDING SIZE']) net['vis_input'] = InputLayer((None, CFG['VIS SIZE']), input_var=T.matrix()) net['vis_emb'] = DenseLayer(net['vis_input'], num_units=CFG['EMBEDDING SIZE'], nonlinearity=lasagne.nonlinearities.identity) net['vis_emb_reshp'] = ReshapeLayer(net['vis_emb'], (-1, 1, CFG['EMBEDDING SIZE'])) net['decorder_input'] = ConcatLayer( [net['vis_emb_reshp'], net['word_emb']]) net['feat_dropout'] = DropoutLayer(net['decorder_input'], p=0.5) net['mask_input'] = InputLayer((None, CFG['SEQUENCE LENGTH'])) net['lstm'] = LSTMLayer(net['feat_dropout'],num_units=CFG['EMBEDDING SIZE'], \ mask_input=net['mask_input'], grad_clipping=5.) net['lstm_dropout'] = DropoutLayer(net['lstm'], p=0.5) net['lstm_reshp'] = ReshapeLayer(net['lstm_dropout'], (-1, CFG['EMBEDDING SIZE'])) net['word_prob'] = DenseLayer(net['lstm_reshp'], num_units=CFG['VOCAB SIZE'] + 2, nonlinearity=softmax) net['sent_prob'] = ReshapeLayer( net['word_prob'], (-1, CFG['SEQUENCE LENGTH'], CFG['VOCAB SIZE'] + 2)) return net
def __embedding_layer_TO_similarity_layer__(embedding_layer, tripletInput=True): net = {} if tripletInput: net['reshape'] = ReshapeLayer(embedding_layer, (-1, 3, [1])) net['triplet_anchor'] = SliceLayer( net['reshape'], indices=0, axis=1 ) # in order to keep the dim, use slice(0,1) == array[0:1,...] net['triplet_pos'] = SliceLayer(net['reshape'], indices=1, axis=1) net['triplet_neg'] = SliceLayer(net['reshape'], indices=2, axis=1) net['euclid_pos'] = DistanceLayer( [net['triplet_anchor'], net['triplet_pos']], Lp=2, axis=1, keepdims=True) net['euclid_neg'] = DistanceLayer( [net['triplet_anchor'], net['triplet_neg']], Lp=2, axis=1, keepdims=True) net['euclid_dist'] = ConcatLayer( [net['euclid_pos'], net['euclid_neg']], axis=0) else: net['reshape'] = ReshapeLayer(embedding_layer, (-1, 2, [1])) net['pair_1'] = SliceLayer(net['reshape'], indices=0, axis=1) net['pair_2'] = SliceLayer(net['reshape'], indices=1, axis=1) net['euclid_dist'] = DistanceLayer([net['pair_1'], net['pair_2']], Lp=2, axis=1, keepdims=True) # input-->output (shape 1-->1), logistic regression net['similarity'] = DenseLayer(net['euclid_dist'], num_units=1, nonlinearity=sigmoid) return net
def build_discriminator(input_img=None, input_text=None): from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, DenseLayer, batch_norm, ConcatLayer) from lasagne.nonlinearities import LeakyRectify, sigmoid lrelu = LeakyRectify(0.1) # input: (None, 1, 28, 28) layer = InputLayer(shape=(None, 1, 28, 28), input_var=input_img) layer2 = InputLayer(shape=(None,1,300), input_var=input_text) layer2 = ReshapeLayer(layer2, ([0], 1*300)) for i in reversed(range(len(layer_list))): layer = batch_norm(Conv2DLayer(layer, layer_list[i], filter_sz, stride=stride, pad=(filter_sz-1)/2, nonlinearity=lrelu)) newPS = 28 if stride!=1: newPS = 28/(2**len(layer_list)) layer = ReshapeLayer(layer, ([0], layer_list[0]*newPS*newPS)) layer = ConcatLayer([layer, layer2], axis=1) for i in reversed(range(len(fclayer_list))): layer = batch_norm(DenseLayer(layer, fclayer_list[i], nonlinearity=lrelu)) layer = DenseLayer(layer, 1, nonlinearity=None, b=None) print ("Discriminator output:", layer.output_shape) return layer
def bgr_encoder(l_in, tconv_sz, filter_dilation, num_tc_filters, dropout): warmup = 16 batch_size, max_time, _, *crop_size = l_in.output_shape crop_size = tuple(crop_size) # stack pairs of small images into one batch of images l_r1 = ReshapeLayer(l_in, (-1, 1) + crop_size) # process through (siamese) CNN l_cnnout = wide_resnet(l_r1, d=16, k=1) # Concatenate feature vectors from the pairs feat_shape = np.asscalar(np.prod(l_cnnout.output_shape[1:])) l_feats = ReshapeLayer(l_cnnout, (batch_size, max_time, 2 * feat_shape)) if dropout > 0: l_feats = DropoutLayer(l_feats, p=dropout) l_out = TemporalConv(l_feats, num_filters=num_tc_filters, filter_size=tconv_sz, filter_dilation=filter_dilation, pad='same', b=None, nonlinearity=None) l_out = BatchNormLayer(l_out, axes=(0, 1)) l_out = NonlinearityLayer(l_out, leaky_rectify) return { 'l_out': l_out, 'warmup': warmup }
def output_path(net, incoming_layer, n_classes, filter_size, out_nonlin): ''' Build the output path (including last conv layer to have n_classes feature maps). Dimshuffle layers to fit with softmax implementation Parameters ---------- Same as above incoming_layer : string, name of last layer from bottleneck layers ''' #Final convolution (n_classes feature maps) with filter_size = 1 net['final_conv'] = ConvLayer(net[incoming_layer], n_classes, 1) #DimshuffleLayer and all this stuff is necessary to fit with softmax #implementation. In training, we specify layer = ['probs'] to have the #right layer but the 2 last reshape layers are necessary only to visualize #data. net['final_dimshuffle'] = DimshuffleLayer(net['final_conv'], (0, 2, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:2]), laySize[2])) net['probs'] = NonlinearityLayer(net['final_reshape'], nonlinearity=out_nonlin) net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 2, 1)) return net
def get_UNet(n_input_channels=1, BATCH_SIZE=None, num_output_classes=2, pad='same', nonlinearity=L.nonlinearities.leaky_rectify, input_dim=(128, 128), base_n_filters=128): net = OrderedDict() net['input'] = InputLayer((BATCH_SIZE, n_input_channels, input_dim[0], input_dim[1])) net['contr_1_1'] = batch_norm(ConvLayer(net['input'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad)) net['contr_1_2'] = batch_norm(ConvLayer(net['contr_1_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad)) net['pool1'] = Pool2DLayer(net['contr_1_2'], 2) net['contr_2_1'] = batch_norm(ConvLayer(net['pool1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad)) net['contr_2_2'] = batch_norm(ConvLayer(net['contr_2_1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad)) net['pool2'] = Pool2DLayer(net['contr_2_2'], 2) net['contr_3_1'] = batch_norm(ConvLayer(net['pool2'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad)) net['contr_3_2'] = batch_norm(ConvLayer(net['contr_3_1'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad)) net['pool3'] = Pool2DLayer(net['contr_3_2'], 2) net['contr_4_1'] = batch_norm(ConvLayer(net['pool3'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad)) net['contr_4_2'] = batch_norm(ConvLayer(net['contr_4_1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad)) l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2) # the paper does not really describe where and how dropout is added. Feel free to try more options l = DropoutLayer(l, p=0.4) net['encode_1'] = batch_norm(ConvLayer(l, base_n_filters * 16, 3, nonlinearity=nonlinearity, pad=pad)) net['encode_2'] = batch_norm(ConvLayer(net['encode_1'], base_n_filters * 16, 3, nonlinearity=nonlinearity, pad=pad)) net['deconv1'] = Upscale2DLayer(net['encode_2'], 2) net['concat1'] = ConcatLayer([net['deconv1'], net['contr_4_2']], cropping=(None, None, "center", "center")) net['expand_1_1'] = batch_norm(ConvLayer(net['concat1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad)) net['expand_1_2'] = batch_norm(ConvLayer(net['expand_1_1'], base_n_filters * 8, 3, nonlinearity=nonlinearity, pad=pad)) net['deconv2'] = Upscale2DLayer(net['expand_1_2'], 2) net['concat2'] = ConcatLayer([net['deconv2'], net['contr_3_2']], cropping=(None, None, "center", "center")) net['expand_2_1'] = batch_norm(ConvLayer(net['concat2'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad)) net['expand_2_2'] = batch_norm(ConvLayer(net['expand_2_1'], base_n_filters * 4, 3, nonlinearity=nonlinearity, pad=pad)) net['deconv3'] = Upscale2DLayer(net['expand_2_2'], 2) net['concat3'] = ConcatLayer([net['deconv3'], net['contr_2_2']], cropping=(None, None, "center", "center")) net['expand_3_1'] = batch_norm(ConvLayer(net['concat3'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad)) net['expand_3_2'] = batch_norm(ConvLayer(net['expand_3_1'], base_n_filters * 2, 3, nonlinearity=nonlinearity, pad=pad)) net['deconv4'] = Upscale2DLayer(net['expand_3_2'], 2) net['concat4'] = ConcatLayer([net['deconv4'], net['contr_1_2']], cropping=(None, None, "center", "center")) net['expand_4_1'] = batch_norm(ConvLayer(net['concat4'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad)) net['expand_4_2'] = batch_norm(ConvLayer(net['expand_4_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad)) net['conv_5'] = ConvLayer(net['expand_4_2'], num_output_classes, 1, nonlinearity=None) # (bs, nrClasses, x, y) net['dimshuffle'] = DimshuffleLayer(net['conv_5'], (1, 0, 2, 3)) # (nrClasses, bs, x, y) net['reshapeSeg'] = ReshapeLayer(net['dimshuffle'], (num_output_classes, -1)) # (nrClasses, bs*x*y) net['dimshuffle2'] = DimshuffleLayer(net['reshapeSeg'], (1, 0)) # (bs*x*y, nrClasses) #Watch out: here is another nonlinearity -> do not use layers before this layer! net['output_flat'] = NonlinearityLayer(net['dimshuffle2'], nonlinearity=L.nonlinearities.sigmoid) # (bs*x*y, nrClasses) img_shape = net["conv_5"].output_shape net['output'] = ReshapeLayer(net['output_flat'], (-1, img_shape[2], img_shape[3], img_shape[1])) # (bs, x, y, nrClasses) return net
def non_flattening_dense_layer(layer, mask, num_units, *args, **kwargs): """ Lasagne dense layer which is not flattening the outputs """ batchsize, seqlen = mask.input_var.shape l_flat = ReshapeLayer(layer, (-1, [2])) l_dense = DenseLayer(l_flat, num_units, *args, **kwargs) return ReshapeLayer(l_dense, (batchsize, seqlen, -1))
def build_indrnn_network(X_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len, indim, 3), X_sym) net['input'] = ReshapeLayer(net['input0'], (batch_size, seq_len, indim * 3)) net['rnn0'] = DimshuffleLayer(net['input'], (1, 0, 2)) for l in range(1, num_layers + 1): hidini = 0 if l == num_layers: hidini = U_lowbound net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (batch_size * seq_len, -1)) net['rnn%d' % (l - 1)] = DenseLayer(net['rnn%d' % (l - 1)], hidden_units, W=ini_W, b=lasagne.init.Constant( args.ini_b), nonlinearity=None) # net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (seq_len, batch_size, -1)) if args.conv_drop: net['rnn%d' % (l - 1)] = DropoutLayer(net['rnn%d' % (l - 1)], p=droprate, shared_axes=(0, )) net['rnn%d' % l] = net['rnn%d' % (l - 1)] if not args.use_bn_afterrnn: net['rnn%d' % l] = BatchNormLayer(net['rnn%d' % l], beta=lasagne.init.Constant( args.ini_b), axes=(0, 1)) net['rnn%d' % l] = rnnmodel(net['rnn%d' % l], hidden_units, W_hid_to_hid=Uniform(range=(hidini, U_bound)), nonlinearity=act, only_return_final=False, grad_clipping=gradclipvalue) if args.use_bn_afterrnn: net['rnn%d' % l] = BatchNormLayer(net['rnn%d' % l], axes=(0, 1)) if args.use_dropout and l % args.drop_layers == 0: net['rnn%d' % l] = DropoutLayer(net['rnn%d' % l], p=droprate, shared_axes=(0, )) net['rnn%d' % num_layers] = lasagne.layers.SliceLayer(net['rnn%d' % num_layers], indices=-1, axis=0) net['out'] = DenseLayer(net['rnn%d' % num_layers], outputclass, nonlinearity=softmax) return net
def build_rnn_network(rnnmodel,X_sym,hid_init_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len),X_sym) net['input']=lasagne.layers.EmbeddingLayer(net['input0'],outputclass,units[0])#,W=lasagne.init.Uniform(inial_scale) net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) #change to (time, batch_size,hidden_units) if use_bn_embed: net['rnn0']=BatchNorm_step_timefirst_Layer(net['rnn0'],axes=(0,1),epsilon=args.epsilon ) for l in range(1, num_layers+1): net['hiddeninput%d'%l] = InputLayer((batch_size, units[l-1]),hid_init_sym[:,acc_units[l-1]:acc_units[l]]) net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1)) net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],units[l-1],W=ini_W,b=lasagne.init.Constant(args.ini_b),nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size, -1)) if args.use_residual and l>args.residual_layers and (l-1)%args.residual_layers==0:# and l!=num_layers if units[l - 1]!=units[l - 1 - args.residual_layers]: net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['sum%d'%(l-args.residual_layers)], (batch_size * seq_len, -1)) net['leftbranch%d' % (l - 1)] = DenseLayer(net['leftbranch%d' % (l - 1)], units[l - 1], W=ini_W, nonlinearity=None) net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['leftbranch%d' % (l - 1)], (seq_len, batch_size, -1)) net['leftbranch%d' % (l - 1)] = BatchNorm_step_timefirst_Layer(net['leftbranch%d' % (l - 1)], axes=(0, 1), epsilon=args.epsilon) print('left branch') else: net['leftbranch%d' % (l - 1)] = net['sum%d'%(l-args.residual_layers)] net['sum%d'%l]=ElemwiseSumLayer((net['rnn%d'%(l-1)],net['leftbranch%d' % (l - 1)])) else: net['sum%d'%l]=net['rnn%d'%(l-1)] net['rnn%d'%l]=net['sum%d'%l] if not args.use_bn_afterrnn: net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),beta=lasagne.init.Constant(args.ini_b),epsilon=args.epsilon) ini_hid_start=0 if act==tanh: ini_hid_start=-1*U_bound net['rnn%d'%l]=rnnmodel(net['rnn%d'%l],units[l-1],hid_init=net['hiddeninput%d'%l],W_hid_to_hid=Uniform(range=(ini_hid_start,U_bound)),nonlinearity=act,only_return_final=False, grad_clipping=args.gradclipvalue) net['last_state%d'%l]=SliceLayer(net['rnn%d'%l],-1, axis=0) if l==1: net['hid_out']=net['last_state%d'%l] else: net['hid_out']=ConcatLayer([net['hid_out'], net['last_state%d'%l]],axis=1) if use_dropout and l%droplayers==0 and not args.bn_drop: net['rnn%d'%l]=lasagne.layers.DropoutLayer(net['rnn%d'%l], p=droprate, shared_axes=taxdrop) if args.use_bn_afterrnn: net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),epsilon=args.epsilon) net['rnn%d'%num_layers]=DimshuffleLayer(net['rnn%d'%num_layers],(1,0,2)) net['reshape_rnn']=ReshapeLayer(net['rnn%d'%num_layers],(-1,units[num_layers-1])) net['out']=DenseLayer(net['reshape_rnn'],outputclass,nonlinearity=softmax)#lasagne.init.HeNormal(gain='relu'))#,W=Uniform(inial_scale) return net
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen_raw = l_mask.input_var.shape[1] # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer(substreams, name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer(substreams, axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output') return l_out, l_fuse
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True): weights, biases, shapes, nonlinearities = dbn gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def GRURecurrent(input_var, mask_var=None, batch_size=1, n_in=100, n_out=1, n_hid=200, diag_val=0.9, offdiag_val=0.01, out_nlin=lasagne.nonlinearities.linear): # Input Layer l_in = InputLayer((batch_size, None, n_in), input_var=input_var) if mask_var == None: l_mask = None else: l_mask = InputLayer((batch_size, None), input_var=mask_var) _, seqlen, _ = l_in.input_var.shape l_rec = GRULayer( l_in, n_hid, resetgate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05), W_hid=lasagne.init.GlorotNormal(0.05), W_cell=None, b=lasagne.init.Constant(0.)), updategate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05), W_hid=lasagne.init.GlorotNormal(0.05), W_cell=None), hidden_update=lasagne.layers.Gate( W_in=lasagne.init.GlorotNormal(0.05), W_hid=LeInit(diag_val=diag_val, offdiag_val=offdiag_val), W_cell=None, nonlinearity=lasagne.nonlinearities.rectify), hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=10., unroll_scan=False, precompute_input=True, mask_input=l_mask, only_return_final=False) # Output Layer l_shp = ReshapeLayer(l_rec, (-1, n_hid)) l_dense = DenseLayer(l_shp, num_units=n_out, W=lasagne.init.GlorotNormal(0.05), nonlinearity=out_nlin) # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above. l_out = ReshapeLayer(l_dense, (batch_size, seqlen, n_out)) return l_out, l_rec
def create_lstm(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.01) for i in range(1): network = LSTMLayer(network, hidden_layer_size, learn_init=True) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, name='mask') symbolic_seqlen = l_in.input_var.shape[1] l_delta = DeltaLayer(l_in, window, name='delta') if use_blstm: f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def TanhRecurrent(input_var, mask_var=None, batch_size=1, n_in=100, n_out=1, n_hid=200, wscale=1.0, out_nlin=lasagne.nonlinearities.linear): # Input Layer l_in = InputLayer((batch_size, None, n_in), input_var=input_var) if mask_var == None: l_mask = None else: l_mask = InputLayer((batch_size, None), input_var=mask_var) _, seqlen, _ = l_in.input_var.shape l_in_hid = DenseLayer(lasagne.layers.InputLayer((None, n_in)), n_hid, W=lasagne.init.HeNormal(0.95), nonlinearity=lasagne.nonlinearities.linear) l_hid_hid = DenseLayer(lasagne.layers.InputLayer((None, n_hid)), n_hid, W=lasagne.init.HeNormal(gain=wscale), nonlinearity=lasagne.nonlinearities.linear) l_rec = lasagne.layers.CustomRecurrentLayer( l_in, l_in_hid, l_hid_hid, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, grad_clipping=100) l_shp_1 = ReshapeLayer(l_rec, (-1, n_hid)) l_shp_2 = ReshapeLayer(l_hid_hid, (-1, n_hid)) l_shp = lasagne.layers.ElemwiseSumLayer( (l_shp_1, l_shp_2), coeffs=(np.float32(0.2), np.float32(0.8))) # Output Layer l_dense = DenseLayer(l_shp, num_units=n_out, W=lasagne.init.HeNormal(0.95), nonlinearity=out_nlin) # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above. l_out = ReshapeLayer(l_dense, (batch_size, seqlen, n_out)) return l_out, l_rec