def __embedding_layer_TO_similarity_layer__(embedding_layer, tripletInput=True): net = {} if tripletInput: net['reshape'] = ReshapeLayer(embedding_layer, (-1, 3, [1])) net['triplet_anchor'] = SliceLayer( net['reshape'], indices=0, axis=1 ) # in order to keep the dim, use slice(0,1) == array[0:1,...] net['triplet_pos'] = SliceLayer(net['reshape'], indices=1, axis=1) net['triplet_neg'] = SliceLayer(net['reshape'], indices=2, axis=1) net['euclid_pos'] = DistanceLayer( [net['triplet_anchor'], net['triplet_pos']], Lp=2, axis=1, keepdims=True) net['euclid_neg'] = DistanceLayer( [net['triplet_anchor'], net['triplet_neg']], Lp=2, axis=1, keepdims=True) net['euclid_dist'] = ConcatLayer( [net['euclid_pos'], net['euclid_neg']], axis=0) else: net['reshape'] = ReshapeLayer(embedding_layer, (-1, 2, [1])) net['pair_1'] = SliceLayer(net['reshape'], indices=0, axis=1) net['pair_2'] = SliceLayer(net['reshape'], indices=1, axis=1) net['euclid_dist'] = DistanceLayer([net['pair_1'], net['pair_2']], Lp=2, axis=1, keepdims=True) # input-->output (shape 1-->1), logistic regression net['similarity'] = DenseLayer(net['euclid_dist'], num_units=1, nonlinearity=sigmoid) return net
def conv_net(input_layer): if self.n_mi_features != 0: conv_input = SliceLayer( input_layer, indices=slice(0, input_layer.shape[1] - self.n_mi_features)) mi_input = SliceLayer( input_layer, indices=slice(input_layer.shape[1] - self.n_mi_features, None)) else: conv_input = input_layer mi_input = None conv_input = ReshapeLayer( conv_input, (-1, 1, self.input_size, self.input_size)) conv_layer_output_shapes = [] output = Conv2DLayer(conv_input, 64, 5, stride=2, pad='same') conv_layer_output_shapes.append(output.output_shape[2]) output = Conv2DLayer(output, 128, 5, stride=2, pad='same') conv_layer_output_shapes.append(output.output_shape[2]) output = ReshapeLayer(output, (-1, num_elems(output))) if mi_input is not None: output = ConcatLayer([output, mi_input], axis=1) output = BatchNormLayer(DenseLayer(output, conv_output_size)) return output, conv_layer_output_shapes
def test_slice_layer(): from lasagne.layers import SliceLayer, InputLayer, get_output_shape,\ get_output from numpy.testing import assert_array_almost_equal as aeq in_shp = (3, 5, 2) l_inp = InputLayer(in_shp) l_slice_ax0 = SliceLayer(l_inp, axis=0, indices=0) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) l_slice_ax2 = SliceLayer(l_inp, axis=-1, indices=-1) x = np.arange(np.prod(in_shp)).reshape(in_shp).astype('float32') x1 = x[0] x2 = x[:, 3:5] x3 = x[:, :, -1] assert get_output_shape(l_slice_ax0) == x1.shape assert get_output_shape(l_slice_ax1) == x2.shape assert get_output_shape(l_slice_ax2) == x3.shape aeq(get_output(l_slice_ax0, x).eval(), x1) aeq(get_output(l_slice_ax1, x).eval(), x2) aeq(get_output(l_slice_ax2, x).eval(), x3) # test slicing None dimension in_shp = (2, None, 2) l_inp = InputLayer(in_shp) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) assert get_output_shape(l_slice_ax1) == (2, None, 2) aeq(get_output(l_slice_ax1, x).eval(), x2)
def build_model(self, input_batch): ## initialize shared parameters Ws = [] bs = [] nLayersWithParams = 13 if self.refinement_network: nLayersWithParams = nLayersWithParams + 4 for i in range(nLayersWithParams): W = HeUniform() Ws.append(W) b = Constant(0.0) bs.append(b) hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 64, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 64, self.npx/2, self.npx/2)) ## get inputs inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) outputs = [] for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) ## FIFO operation. inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) if i == self.input_seqlen - self.nInputs: filtersToVisualize = filters if i >= self.input_seqlen - self.nInputs: inputs = ConcatLayer([inputs, output], axis=1) outputs.append(output) return output, outputs, filtersToVisualize
def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'): # GPU cached data _M = theano.shared(M.T.astype(float64)) dum = Th.vector('dum') # Get layer sizes K = [] for i in range(len(W1)): K.append([W1[i].shape[0], W2[i].shape[0]]) K.append([M.T.shape[1], M.T.shape[1]]) # We have weights to discover, init = 2/(Nin+Nout) H = theano.shared( sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) * random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64)) fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H) # Split in two pathways, one for each source's autoencoder H1 = (len(W1) + 1) * [None] H2 = (len(W1) + 1) * [None] H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1) H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1) # Put the subsequent layers for i in range(len(W1)): H1[i + 1] = DenseLayer(H1[i], num_units=K[i + 1][0], W=W1[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) H2[i + 1] = DenseLayer(H2[i], num_units=K[i + 1][1], W=W2[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) # Add the two approximations R = ElemwiseSumLayer([H1[-1], H2[-1]]) # Cost function Ro = get_output(R) + eps cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro) + 0 * Th.mean(dum) for i in range(len(H1) - 1): cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean( abs(get_output(H2[i]))) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([d]).astype(float64), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float64)).T + eps _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T return _r, _r1, _r2, er
def model(show_model): """ Compile net architecture """ # --- input layers --- l_view1 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_1[0])) l_view2 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_2[0])) net1 = l_view1 net2 = l_view2 # --- feed forward part view 1 --- for _ in range(N_LAYERS_IMG): net1 = dense_bn(net1, num_units=N_HIDDEN_IMG, nonlinearity=nonlin) l_v1latent = DenseLayer(net1, num_units=dim_latent, nonlinearity=identity, W=init()) # --- feed forward part view 2 --- for _ in range(N_LAYERS_TXT): net2 = dense_bn(net2, num_units=N_HIDDEN_TXT, nonlinearity=nonlin) l_v2latent = DenseLayer(net2, num_units=dim_latent, nonlinearity=identity, W=init()) # --- multi modality part --- # merge modalities by cca projection or learned embedding layer if use_ccal: net = CCALayer([l_v1latent, l_v2latent], r1, r2, rT, alpha=alpha, wl=weight_tno) else: net = LearnedCCALayer([l_v1latent, l_v2latent], U=init(), V=init(), alpha=alpha) # split modalities again l_v1 = SliceLayer(net, slice(0, dim_latent), axis=1) l_v2 = SliceLayer(net, slice(dim_latent, 2 * dim_latent), axis=1) # normalize (per row) output to length 1.0 l_v1 = LengthNormLayer(l_v1) l_v2 = LengthNormLayer(l_v2) # --- print architectures --- if show_model: print_architecture(l_v1) print_architecture(l_v2) return l_view1, l_view2, l_v1, l_v2
def cnn(self): self._network['input'] = pelu(batch_norm(lasagne.layers.InputLayer(shape=(None, self._number_of_channel, 8, 14), input_var=self._x, pad='same', W=lasagne.init.HeNormal(gain='relu')))) print self._network['input'].output_shape first_part_input = SliceLayer(self._network['input'], indices=slice(0, 2), axis=1) print first_part_input.output_shape second_part_input = SliceLayer(self._network['input'], indices=slice(2, 4), axis=1) print second_part_input.output_shape first_dropout_2 = self.cnn_separate_convolutions(first_part_input, first_part=True) second_dropout_2 = self.cnn_separate_convolutions(second_part_input, first_part=False) self._network['sumwise_layer'] = ElemwiseSumLayer([first_dropout_2, second_dropout_2, ScaleLayer(self._network['sumwise_layer_pre_training'])]) self._network['conv3'] = pelu(batch_norm(lasagne.layers.Conv2DLayer(self._network['sumwise_layer'], num_filters=48, filter_size=(3, 3), W=lasagne.init.HeNormal(gain='relu')))) print self._network['conv3'].output_shape self._network['dropout_3'] = mc_dropout.MCDropout(self._network['conv3'], p=self._percentage_dropout_cnn_layers) self._network['merge_with_pre_training_dense_layer_1'] = ElemwiseSumLayer( [ScaleLayer(self._network['dropout_3_pre_training']), self._network['dropout_3']]) print np.shape(self._network['pre_training_fc1_full'].W.get_value()) self._network['fc1'] = mc_dropout.MCDropout(pelu(batch_norm(lasagne.layers.DenseLayer( self._network['merge_with_pre_training_dense_layer_1'], num_units=100, W=lasagne.init.HeNormal(gain='relu')))), p=self._percentage_dropout_dense_layers) print self._network['fc1'].output_shape self._network['merge_with_pre_training_dense_layer_2'] = ElemwiseSumLayer( [ScaleLayer(self._network['fc1_pre_training']), self._network['fc1']]) self._network['fc2'] = mc_dropout.MCDropout(pelu(batch_norm( lasagne.layers.DenseLayer(self._network['merge_with_pre_training_dense_layer_2'], num_units=100, W=lasagne.init.HeNormal(gain='relu')))), p=self._percentage_dropout_dense_layers) print self._network['fc2'].output_shape self._network['merge_with_pre_training_output'] = ElemwiseSumLayer( [ScaleLayer(self._network['fc2_pre_training']), self._network['fc2']]) self._network['output'] = lasagne.layers.DenseLayer(self._network['merge_with_pre_training_output'], num_units=self._number_of_class, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.HeNormal(gain='relu')) print self._network['output'].output_shape
def pre_training_cnn(self): self._network = {} self._network['input_pre_training'] = lasagne.layers.InputLayer(shape=(None,self._number_of_channel, 8, 14), input_var=self._x, pad='same', W=lasagne.init.HeNormal(gain='relu')) self._network['input_normalized'] = prelu(batch_norm(self._network['input_pre_training'])) print self._network['input_normalized'].output_shape first_part_input = SliceLayer(self._network['input_normalized'], indices=slice(0, 2), axis=1) print first_part_input.output_shape second_part_input = SliceLayer(self._network['input_normalized'], indices=slice(2, 4), axis=1) print second_part_input.output_shape first_network = self.cnn_separate_convolutions_pre_training(first_part_input, first_part=True) second_network = self.cnn_separate_convolutions_pre_training(second_part_input, first_part=False) self._network['sumwise_layer_pre_training'] = ElemwiseSumLayer([first_network, second_network]) self._network['conv3_pre_training_cnn'] = lasagne.layers.Conv2DLayer(self._network['sumwise_layer_pre_training'], num_filters=48, filter_size=(3, 3), W=lasagne.init.HeNormal(gain='relu')) self._network['conv3_pre_training'] = prelu(batch_norm(self._network['conv3_pre_training_cnn'])) print self._network['conv3_pre_training'].output_shape self._network['dropout_3_pre_training'] = mc_dropout.MCDropout(self._network['conv3_pre_training'], p=self._percentage_dropout_cnn_layers) self._network['pre_training_fc1_full'] = lasagne.layers.DenseLayer(self._network['dropout_3_pre_training'], num_units=100, W=lasagne.init.HeNormal(gain='relu')) self._network['fc1_pre_training'] = mc_dropout.MCDropout(prelu(batch_norm(self._network['pre_training_fc1_full'])), p=self._percentage_dropout_dense_layers) print self._network['fc1_pre_training'].output_shape self._network['pre_training_fc2_full'] = lasagne.layers.DenseLayer(self._network['fc1_pre_training'], num_units=100, W=lasagne.init.HeNormal(gain='relu')) self._network['fc2_pre_training'] = mc_dropout.MCDropout(prelu(batch_norm(self._network['pre_training_fc2_full'])), p=self._percentage_dropout_dense_layers) print self._network['fc2_pre_training'].output_shape self._network['output_gesture_pre_training'] = lasagne.layers.DenseLayer(self._network['fc2_pre_training'], num_units=self._number_of_class, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.HeNormal(gain='relu')) print self._network['output_gesture_pre_training'].output_shape print "Pre-Training done printing"
def create_network(): l = 1000 pool_size = 5 test_size1 = 13 test_size2 = 7 test_size3 = 5 kernel1 = 128 kernel2 = 128 kernel3 = 128 layer1 = InputLayer(shape=(None, 1, 4, l + 1024)) layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis=-1) layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis=-1) layer2_3 = SliceLayer(layer2_2, indices=slice(0, 4), axis=-2) layer2_f = FlattenLayer(layer2_3) layer3 = Conv2DLayer(layer2_1, num_filters=kernel1, filter_size=(4, test_size1)) layer4 = Conv2DLayer(layer3, num_filters=kernel1, filter_size=(1, test_size1)) layer5 = Conv2DLayer(layer4, num_filters=kernel1, filter_size=(1, test_size1)) layer6 = MaxPool2DLayer(layer5, pool_size=(1, pool_size)) layer7 = Conv2DLayer(layer6, num_filters=kernel2, filter_size=(1, test_size2)) layer8 = Conv2DLayer(layer7, num_filters=kernel2, filter_size=(1, test_size2)) layer9 = Conv2DLayer(layer8, num_filters=kernel2, filter_size=(1, test_size2)) layer10 = MaxPool2DLayer(layer9, pool_size=(1, pool_size)) layer11 = Conv2DLayer(layer10, num_filters=kernel3, filter_size=(1, test_size3)) layer12 = Conv2DLayer(layer11, num_filters=kernel3, filter_size=(1, test_size3)) layer13 = Conv2DLayer(layer12, num_filters=kernel3, filter_size=(1, test_size3)) layer14 = MaxPool2DLayer(layer13, pool_size=(1, pool_size)) layer14_d = DenseLayer(layer14, num_units=256) layer3_2 = DenseLayer(layer2_f, num_units=128) layer15 = ConcatLayer([layer14_d, layer3_2]) layer16 = DropoutLayer(layer15, p=0.5) layer17 = DenseLayer(layer16, num_units=256) network = DenseLayer(layer17, num_units=2, nonlinearity=softmax) return network
def self_attention(incoming, key_size=None,value_size=None,mask_input=None,name='attn', attn_class=DotAttentionLayer,**kwargs): """ A convenience function that applies attention from sequential layer to itself. /-> queries -------v incoming --> keys ---> attention_probs ---v \-> values -------------------> attention response :param incoming: input sequence of shape [batch, time, units] :param key_size: num units in attention query and key, defaults to incoming.shape[-1] :param value_size: num units in attention values, defaults to key_size :param attn_class: either DotAttentionLayer or AttentionLayer or similar layer (incl. multihead attention) :param kwargs: also accepts any parameters accepted by attn_class Heavily inspired by https://arxiv.org/abs/1706.03762 and http://bit.ly/2vsYX0R """ assert len(incoming.output_shape) == 3, "incoming layer must have shape [batch,time,unit]" assert mask_input is None or len(mask_input.output_shape) == 2,"if mask_input is given, it must be [batch,time]" key_size = key_size or incoming.output_shape[-1] value_size = value_size or incoming.output_shape[-1] qkv = DenseLayer(incoming, key_size*2 + value_size, nonlinearity=None, num_leading_axes=2,name=name+'.qkv') #[batch,time,2*key_units+value_units] queries = SliceLayer(qkv, slice(0,key_size),axis=-1) keys = SliceLayer(qkv, slice(key_size,2*key_size), axis=-1) values = SliceLayer(qkv, slice(2*key_size,qkv.num_units), axis=-1) # broadcast each query to every (key,value) pair queries_each_tick = bcast = BroadcastLayer(queries, broadcasted_axes=(0, 1)) #[batch*time,units] # upcast every key and value to match the amount queries key_for_each_query = UpcastLayer(keys, broadcast_layer=bcast) #[batch*time, time, units] value_for_each_query = UpcastLayer(values, broadcast_layer=bcast) #[batch*time, time, value_units] if mask_input is not None: mask_input = UpcastLayer(mask_input,broadcast_layer=bcast) #[batch*time, time] attn_each_tick = attn_class(value_for_each_query, queries_each_tick, key_sequence=key_for_each_query, mask_input=mask_input, name=name,**kwargs)['attn'] #[batch*time, value_units] attn = UnbroadcastLayer(attn_each_tick, broadcast_layer=bcast) #[batch, time, value_units] return attn
def nn_fn(self): l_in_z = InputLayer((None, self.z_dim)) l_in_x = InputLayer((None, self.max_length, self.emb_dim)) l_in_z_reshape = ReshapeLayer(l_in_z, ([0], 1, [1])) l_in_z_rep = TileLayer(l_in_z_reshape, (1, self.max_length, 1)) l_x_pre_pad = SliceLayer(PadLayer(l_in_x, [(1, 0), (0, 0)], batch_ndim=1), indices=slice(0, -1), axis=1) l_in_x_pre_pad_drop = DropoutLayer(l_x_pre_pad, self.nn_word_drop, shared_axes=(-1, )) l_concat = ConcatLayer((l_in_z_rep, l_in_x_pre_pad_drop), axis=-1) l_h = LSTMLayer(l_concat, num_units=self.nn_hid_units) if self.nn_skip: l_h = ConcatLayer((l_h, l_in_z_rep), axis=-1) l_out = DenseLayer(l_h, num_units=self.emb_dim, num_leading_axes=2, nonlinearity=None) return (l_in_z, l_in_x), l_out
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.Orthogonal()): gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm') l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def build_rnn(conv_input_var, seq_input_var, conv_shape, word_dims, n_hid, lstm_layers): ret = {} ret['seq_input'] = seq_layer = InputLayer((None, None, word_dims), input_var=seq_input_var) batchsize, seqlen, _ = seq_layer.input_var.shape ret['seq_resh'] = seq_layer = ReshapeLayer(seq_layer, shape=(-1, word_dims)) ret['seq_proj'] = seq_layer = DenseLayer(seq_layer, num_units=n_hid) ret['seq_resh2'] = seq_layer = ReshapeLayer(seq_layer, shape=(batchsize, seqlen, n_hid)) ret['conv_input'] = conv_layer = InputLayer(conv_shape, input_var=conv_input_var) ret['conv_proj'] = conv_layer = DenseLayer(conv_layer, num_units=n_hid) ret['conv_resh'] = conv_layer = ReshapeLayer(conv_layer, shape=([0], 1, -1)) ret['input_concat'] = layer = ConcatLayer([conv_layer, seq_layer], axis=1) for lstm_layer_idx in xrange(lstm_layers): ret['lstm_{}'.format(lstm_layer_idx)] = layer = LSTMLayer(layer, n_hid) ret['out_resh'] = layer = ReshapeLayer(layer, shape=(-1, n_hid)) ret['output_proj'] = layer = DenseLayer(layer, num_units=word_dims, nonlinearity=log_softmax) ret['output'] = layer = ReshapeLayer(layer, shape=(batchsize, seqlen + 1, word_dims)) ret['output'] = layer = SliceLayer(layer, indices=slice(None, -1), axis=1) return ret
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)')): dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W) weights.append(dbn_layers[2].W) weights.append(dbn_layers[3].W) weights.append(dbn_layers[4].W) biases.append(dbn_layers[1].b) biases.append(dbn_layers[2].b) biases.append(dbn_layers[3].b) biases.append(dbn_layers[4].b) gate_parameters = Gate(W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(weights, biases, l_reshape1) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer( l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice1, num_units=26, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def nn_fn(self): l_in = InputLayer((None, self.max_length, self.emb_dim)) l_mask = InputLayer((None, self.max_length)) l_h = l_in l_h_all = [] for h in range(self.rnn_depth): if self.rnn_bidirectional: l_fwd = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask) l_bwd = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask, backwards=True) l_h = ConcatLayer((l_fwd, l_bwd), axis=-1) else: l_h = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask) l_h_all.append(l_h) l_h = SliceLayer(ElemwiseSumLayer(l_h_all), indices=-1, axis=1) for i in range(self.nn_dense_depth): l_h = DenseLayer(l_h, num_units=self.nn_dense_hid_units) l_mean = DenseLayer(l_h, self.z_dim, nonlinearity=None) l_cov = DenseLayer(l_h, self.z_dim, nonlinearity=softplus_safe) return (l_in, l_mask), (l_mean, l_cov)
def build_model(self, input_batch): filter_size = self.dynamic_filter_size[0] ## get inputs input = InputLayer(input_var=input_batch[:, [0], :, :], shape=(None, 1, self.npx, self.npx)) theta = InputLayer(input_var=input_batch[:, [1], :, :], shape=(None, 1, self.npx, self.npx)) # theta = ReshapeLayer(theta, shape=(self.batch_size, 1, 1, 1)) output = ConvLayer(theta, num_filters=64, filter_size=(1, 1), stride=(1, 1), pad='same', nonlinearity=leaky_rectify) output = ConvLayer(output, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same', nonlinearity=leaky_rectify) filters = ConvLayer(output, num_filters=filter_size**2, filter_size=(1, 1), stride=(1, 1), pad='same', nonlinearity=identity) image = SliceLayer(input, indices=slice(0, 1), axis=1) output = DynamicFilterLayer([image, filters], filter_size=(filter_size, filter_size, 1), pad=(filter_size // 2, filter_size // 2)) return output, [output], filters
def _build(self, forget_bias=5.0, grad_clip=10.0): """Build architecture """ network = InputLayer(shape=(None, self.seq_length, self.input_size), name='input') self.input_var = network.input_var # Hidden layers tanh = lasagne.nonlinearities.tanh gate, constant = lasagne.layers.Gate, lasagne.init.Constant for _ in range(self.depth): network = LSTMLayer(network, self.width, nonlinearity=tanh, grad_clipping=grad_clip, forgetgate=gate(b=constant(forget_bias))) # Retain last-output state network = SliceLayer(network, -1, 1) # Output layer sigmoid = lasagne.nonlinearities.sigmoid loc_layer = DenseLayer(network, self.num_outputs * 2) conf_layer = DenseLayer(network, self.num_outputs, nonlinearity=sigmoid) # Grab all layers into DAPs instance self.network = get_all_layers([loc_layer, conf_layer]) # Get theano expression for outputs of DAPs model self.loc_var, self.conf_var = get_output([loc_layer, conf_layer], deterministic=True)
def create_attention(self, gru_con, in_con_mask, condition, batch_size, n_hidden_con, **kwargs): # (batch_size, n_attention) gru_cond2 = non_flattening_dense_layer(gru_con, self.in_con_mask, self.n_attention, nonlinearity=None) gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None) gru_que2 = dimshuffle(gru_que2, (0, 'x', 1)) att = ElemwiseSumLayer([gru_cond2, gru_que2]) att = NonlinearityLayer(att, T.tanh) att = SliceLayer(non_flattening_dense_layer(att, self.in_con_mask, 1, nonlinearity=None), indices=0, axis=2) att_softmax = SequenceSoftmax(att, self.in_con_mask) rep = ElemwiseMergeLayer( [ForgetSizeLayer(dimshuffle(att_softmax, (0, 1, 'x'))), gru_con], T.mul) return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s: (s[0], ) + s[2:])
def inverse_convolution_strided_layer(input_layer, original_layer): return ReshapeLayer(SliceLayer( TransposedConv2DLayer(ReshapeLayer(input_layer, (-1, original_layer.output_shape[1], 1, original_layer.output_shape[2])), original_layer.input_layer.num_filters, (1, original_layer.filter_size[0]), stride=(1, original_layer.stride[0]), crop=(0, 0), flip_filters=original_layer.flip_filters, nonlinearity=nonlinearities.leaky_rectify), indices=slice(None, -1), axis=-1), (-1, original_layer.input_shape[1], original_layer.input_shape[2]))
def build_convpool_lstm(input_vars, input_shape=None): """ Builds the complete network with LSTM layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(input_shape[0]): if i == 0: convnet, W_init = build_cnn(input_vars[i], input_shape) else: convnet, _ = build_cnn(input_vars[i], input_shape, W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer( convpool, ([0], input_shape[0], get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=32, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.sigmoid) #convpool = lasagne.layers.dropout(convpool, p=.3) convpool = LSTMLayer(convpool, num_units=32, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.sigmoid) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def gru_hidden_readout(column, indices): hidden = [] for layer in column: name = os.path.join(layer.name, "slice") slice_ = SliceLayer(layer, indices, axis=1, name=name) hidden.append(slice_) return hidden
def build_convpool_mix(input_vars, nb_classes, grad_clip=110, imsize=32, n_colors=3, n_timewin=7): """ Builds the complete network with LSTM and 1D-conv layers combined :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param grad_clip: the gradient messages are clipped to the given value during the backward pass. :param imsize: size of the input image (assumes a square input) :param n_colors: number of color channels in the image :param n_timewin: number of time windows in the snippet :return: a pointer to the output of last layer """ convnets = [] w_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, w_init = build_cnn(input_vars[i], imsize=imsize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], w_init=w_init, imsize=imsize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) reformConvpool = DimshuffleLayer(convpool, (0, 2, 1)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) conv_out = Conv1DLayer(reformConvpool, 64, 3) conv_out = FlattenLayer(conv_out) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) lstm = LSTMLayer(convpool, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) lstm_out = SliceLayer(lstm, -1, 1) # Merge 1D-Conv and LSTM outputs dense_input = ConcatLayer([conv_out, lstm_out]) # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the 10-unit output layer with 50% dropout on its inputs: convpool = DenseLayer(convpool, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def gru_stack_readout(column, indices): state = [] for layer in column: name = os.path.join(layer.name, "stack") stack = GRUStackReadoutLayer(layer, name=name) slice_ = SliceLayer(stack, indices, axis=1, name=os.path.join(name, "slice")) state.append(slice_) return state
def util_slice_layer(self, layer, persons_cnt, factor): g_sz = persons_cnt//factor layers = [] for i in range(factor): layer_i = SliceLayer(layer, indices=slice(i*g_sz, (i+1)*g_sz), axis=2) layers.append(layer_i) return layers
def sliding_window_input(input_layer): window_size = 5 sub_input = [] for i in xrange(window_size): indices = slice(window_size - i - 1, -i if i > 0 else None) network = DimshuffleLayer(SliceLayer(input_layer, indices, axis=-1), (0, 1, 'x')) sub_input.append(network) network = ConcatLayer(sub_input, -1) return network
def build_lstm(input_vars, input_shape=None): ''' 1) InputLayer 2) ReshapeLayer 3) LSTM Layer 1 4) LSTM Layer 2 5) Slice Layer 6) Fully Connected Layer 1 w/ dropout tanh 7) Fully Connected Layer 2 w/ dropout softmax ''' # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) network = InputLayer(shape=(input_shape[0], None, num_input_channels, input_shape[-3], input_shape[-2], input_shape[-1]), input_var=input_vars) network = ReshapeLayer(network, ([0], [1], -1)) network = DimshuffleLayer(network, (1, 0, 2)) #network = ReshapeLayer(network, (-1, 128)) #l_inp = InputLayer((None, None, num_inputs)) l_lstm1 = LSTMLayer(network, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) #New LSTM l_lstm2 = LSTMLayer(l_lstm1, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) #end of insertion # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py l_lstm_slice = SliceLayer(l_lstm2, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: l_dense = DenseLayer(lasagne.layers.dropout(l_lstm_slice, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the output layer with 50% dropout on its inputs: l_dense = DenseLayer(lasagne.layers.dropout(l_dense, p=.5), num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax) return l_dense
def build_convpool_lstm(input_vars, nb_classes, grad_clip=110, imsize=32, n_colors=3, n_timewin=7): """ Builds the complete network with LSTM layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param grad_clip: the gradient messages are clipped to the given value during the backward pass. :param imsize: size of the input image (assumes a square input) :param n_colors: number of color channels in the image :param n_timewin: number of time windows in the snippet :return: a pointer to the output of last layer """ convnets = [] w_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, w_init = build_cnn(input_vars[i], imsize=imsize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], w_init=w_init, imsize=imsize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def build_rnn_network(rnnmodel,X_sym,hid_init_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len),X_sym) net['input']=lasagne.layers.EmbeddingLayer(net['input0'],outputclass,units[0])#,W=lasagne.init.Uniform(inial_scale) net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) #change to (time, batch_size,hidden_units) if use_bn_embed: net['rnn0']=BatchNorm_step_timefirst_Layer(net['rnn0'],axes=(0,1),epsilon=args.epsilon ) for l in range(1, num_layers+1): net['hiddeninput%d'%l] = InputLayer((batch_size, units[l-1]),hid_init_sym[:,acc_units[l-1]:acc_units[l]]) net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1)) net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],units[l-1],W=ini_W,b=lasagne.init.Constant(args.ini_b),nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size, -1)) if args.use_residual and l>args.residual_layers and (l-1)%args.residual_layers==0:# and l!=num_layers if units[l - 1]!=units[l - 1 - args.residual_layers]: net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['sum%d'%(l-args.residual_layers)], (batch_size * seq_len, -1)) net['leftbranch%d' % (l - 1)] = DenseLayer(net['leftbranch%d' % (l - 1)], units[l - 1], W=ini_W, nonlinearity=None) net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['leftbranch%d' % (l - 1)], (seq_len, batch_size, -1)) net['leftbranch%d' % (l - 1)] = BatchNorm_step_timefirst_Layer(net['leftbranch%d' % (l - 1)], axes=(0, 1), epsilon=args.epsilon) print('left branch') else: net['leftbranch%d' % (l - 1)] = net['sum%d'%(l-args.residual_layers)] net['sum%d'%l]=ElemwiseSumLayer((net['rnn%d'%(l-1)],net['leftbranch%d' % (l - 1)])) else: net['sum%d'%l]=net['rnn%d'%(l-1)] net['rnn%d'%l]=net['sum%d'%l] if not args.use_bn_afterrnn: net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),beta=lasagne.init.Constant(args.ini_b),epsilon=args.epsilon) ini_hid_start=0 if act==tanh: ini_hid_start=-1*U_bound net['rnn%d'%l]=rnnmodel(net['rnn%d'%l],units[l-1],hid_init=net['hiddeninput%d'%l],W_hid_to_hid=Uniform(range=(ini_hid_start,U_bound)),nonlinearity=act,only_return_final=False, grad_clipping=args.gradclipvalue) net['last_state%d'%l]=SliceLayer(net['rnn%d'%l],-1, axis=0) if l==1: net['hid_out']=net['last_state%d'%l] else: net['hid_out']=ConcatLayer([net['hid_out'], net['last_state%d'%l]],axis=1) if use_dropout and l%droplayers==0 and not args.bn_drop: net['rnn%d'%l]=lasagne.layers.DropoutLayer(net['rnn%d'%l], p=droprate, shared_axes=taxdrop) if args.use_bn_afterrnn: net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),epsilon=args.epsilon) net['rnn%d'%num_layers]=DimshuffleLayer(net['rnn%d'%num_layers],(1,0,2)) net['reshape_rnn']=ReshapeLayer(net['rnn%d'%num_layers],(-1,units[num_layers-1])) net['out']=DenseLayer(net['reshape_rnn'],outputclass,nonlinearity=softmax)#lasagne.init.HeNormal(gain='relu'))#,W=Uniform(inial_scale) return net
def build_convpool_mix(input_vars, numTimeWin, nb_classes, GRAD_CLIP=100): """ Builds the complete network with LSTM and 1D-conv layers combined to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param numTimeWin: number of time windows :param nb_classes: number of classes :param GRAD_CLIP: the gradient messages are clipped to the given value during the backward pass. :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(numTimeWin): if i == 0: convnet, W_init = build_cnn(input_vars[i]) else: convnet, _ = build_cnn(input_vars[i], W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer(convpool, ([0], numTimeWin, get_output_shape(convnets[0])[1])) reformConvpool = DimshuffleLayer(convpool, (0, 2, 1)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) conv_out = Conv1DLayer(reformConvpool, 64, 3) conv_out = FlattenLayer(conv_out) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) lstm = LSTMLayer(convpool, num_units=128, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py # lstm_out = SliceLayer(convpool, -1, 1) # bypassing LSTM lstm_out = SliceLayer(lstm, -1, 1) # Merge 1D-Conv and LSTM outputs dense_input = ConcatLayer([conv_out, lstm_out]) # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the 10-unit output layer with 50% dropout on its inputs: convpool = DenseLayer(convpool, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def _blstm_module(incoming, n_hidden, bl_dropout, bn, mask=None): l_prev = incoming for i, n_hid in enumerate(n_hidden): l_prev, l_forward, l_backward = _blstm_layer(l_prev, n_hid, mask=mask) if len(n_hidden) - 1 > i: if bn: self.log += "\nAdding batchnorm" l_prev = batch_norm(l_prev) if bl_dropout > .0: self.log += "\nAdding between layer dropout: %.2f" % dropout l_prev = DropoutLayer(l_prev, p=bl_dropout) # Slicing out the last units for classification l_forward_slice = SliceLayer(l_forward, -1, 1) l_backward_slice = SliceLayer(l_backward, 0, 1) l_prev = ConcatLayer([l_forward_slice, l_backward_slice], axis=1) return l_prev