def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2,2) out_num_filters = input_num_filters*2 else: first_stride = (1,1) out_num_filters = input_num_filters stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2]//2, s[3]//2)) padding = PadLayer(identity, [out_num_filters//4,0,0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]),nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify) return block
def residual_block(l, increase_dim=False, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) dropout = DropoutLayer(conv_1, p=0.3) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def residual_block(l, increase_dim=False, projection=True, first=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) if first: bn_pre_relu = l else: bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=HeNormal(gain='relu'))) dropout = DropoutLayer(conv_1, p=0.3) conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=HeNormal(gain='relu')) if increase_dim: projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) elif first: projection = ConvLayer(l, num_filters=filters, filter_size=(1,1), stride=(1,1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def resnet_block(input_, filter_size, num_filters, activation=relu, downsample=False, no_output_act=True, use_shortcut=False, use_wn=False, W_init=Normal(0.02), **kwargs): """ Resnet block layer. """ normalization = weight_norm if use_wn else batch_norm block = [] _stride = 2 if downsample else 1 # conv -> BN -> Relu block.append(normalization(conv_layer(input_, filter_size, num_filters, _stride, 'same', nonlinearity=activation, W=W_init ))) # Conv -> BN block.append(normalization(conv_layer(block[-1], filter_size, num_filters, 1, 'same', nonlinearity=None, W=W_init))) if downsample or use_shortcut: shortcut = conv_layer(input_, 1, num_filters, _stride, 'valid', nonlinearity=None) block.append(ElemwiseSumLayer([shortcut, block[-1]])) else: block.append(ElemwiseSumLayer([input_, block[-1]])) if not no_output_act: block.append(NonlinearityLayer(block[-1], nonlinearity=activation)) return block[-1]
def _residual_block_(self, l, increase_dim=False, projection=True, first=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def residual_block(l, increase_dim=False, projection=True, first=False): """ Create a residual learning building block with two stacked 3x3 convlayers as in paper 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) """ input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters if first: # hacky solution to keep layers correct bn_pre_relu = l else: # contains the BN -> ReLU portion, steps 1 to 2 bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) # contains the weight -> BN -> ReLU portion, steps 3 to 5 conv_1 = batch_norm( ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm)) # contains the last weight portion, step 6 conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([conv_2, projection]) else: block = ElemwiseSumLayer([conv_2, l]) return block
def cnn(self): self._network['input'] = pelu(batch_norm(lasagne.layers.InputLayer(shape=(None, self._number_of_channel, 8, 14), input_var=self._x, pad='same', W=lasagne.init.HeNormal(gain='relu')))) print self._network['input'].output_shape first_part_input = SliceLayer(self._network['input'], indices=slice(0, 2), axis=1) print first_part_input.output_shape second_part_input = SliceLayer(self._network['input'], indices=slice(2, 4), axis=1) print second_part_input.output_shape first_dropout_2 = self.cnn_separate_convolutions(first_part_input, first_part=True) second_dropout_2 = self.cnn_separate_convolutions(second_part_input, first_part=False) self._network['sumwise_layer'] = ElemwiseSumLayer([first_dropout_2, second_dropout_2, ScaleLayer(self._network['sumwise_layer_pre_training'])]) self._network['conv3'] = pelu(batch_norm(lasagne.layers.Conv2DLayer(self._network['sumwise_layer'], num_filters=48, filter_size=(3, 3), W=lasagne.init.HeNormal(gain='relu')))) print self._network['conv3'].output_shape self._network['dropout_3'] = mc_dropout.MCDropout(self._network['conv3'], p=self._percentage_dropout_cnn_layers) self._network['merge_with_pre_training_dense_layer_1'] = ElemwiseSumLayer( [ScaleLayer(self._network['dropout_3_pre_training']), self._network['dropout_3']]) print np.shape(self._network['pre_training_fc1_full'].W.get_value()) self._network['fc1'] = mc_dropout.MCDropout(pelu(batch_norm(lasagne.layers.DenseLayer( self._network['merge_with_pre_training_dense_layer_1'], num_units=100, W=lasagne.init.HeNormal(gain='relu')))), p=self._percentage_dropout_dense_layers) print self._network['fc1'].output_shape self._network['merge_with_pre_training_dense_layer_2'] = ElemwiseSumLayer( [ScaleLayer(self._network['fc1_pre_training']), self._network['fc1']]) self._network['fc2'] = mc_dropout.MCDropout(pelu(batch_norm( lasagne.layers.DenseLayer(self._network['merge_with_pre_training_dense_layer_2'], num_units=100, W=lasagne.init.HeNormal(gain='relu')))), p=self._percentage_dropout_dense_layers) print self._network['fc2'].output_shape self._network['merge_with_pre_training_output'] = ElemwiseSumLayer( [ScaleLayer(self._network['fc2_pre_training']), self._network['fc2']]) self._network['output'] = lasagne.layers.DenseLayer(self._network['merge_with_pre_training_output'], num_units=self._number_of_class, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.HeNormal(gain='relu')) print self._network['output'].output_shape
def cnn_fn(self): l_in = InputLayer((None, self.max_length, self.vocab_size)) l_in_T = DimshuffleLayer(l_in, (0, 2, 1)) l_causal_conv = DilatedConv1DLayer( l_in_T, num_filters=self.nn_residual_channels, dilation=1, nonlinearity=None) l_prev = l_causal_conv skip_layers = [] for h in range(len(self.nn_dilations)): l_filter = DilatedConv1DLayer( l_prev, num_filters=self.nn_dilation_channels, dilation=self.nn_dilations[h], nonlinearity=tanh) l_gate = DilatedConv1DLayer(l_prev, num_filters=self.nn_dilation_channels, dilation=self.nn_dilations[h], nonlinearity=sigmoid) l_merge = ElemwiseMergeLayer([l_filter, l_gate], merge_function=T.mul) l_dense = Conv1DLayer(l_merge, num_filters=self.nn_residual_channels, filter_size=1, nonlinearity=None) l_residual = ElemwiseSumLayer([l_prev, l_dense]) l_skip = Conv1DLayer(l_merge, num_filters=self.nn_residual_channels, filter_size=1, nonlinearity=None) skip_layers.append(l_skip) l_prev = l_residual l_skip_sum = NonlinearityLayer(ElemwiseSumLayer(skip_layers), nonlinearity=elu) l_final = DimshuffleLayer(l_skip_sum, (0, 2, 1)) return l_final
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen_raw = l_mask.input_var.shape[1] # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer(substreams, name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer(substreams, axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output') return l_out, l_fuse
def make_block(self, name, input, units, inputs=[]): self.make_layer(name + '-A', input, units, alpha=0.1) if len(inputs) > 0: result = inputs[0] i = 1 while i < len(inputs): result = ElemwiseSumLayer([inputs[i], result]) i += 1 # print('input for make_block shape: ', input.shape) # print('units for make_block: ',units) # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0) return ElemwiseSumLayer([result, self.last_layer()]) if args.generator_residual else self.last_layer()
def nn_fn(self): l_in = InputLayer((None, self.max_length, self.emb_dim)) l_mask = InputLayer((None, self.max_length)) l_h = l_in l_h_all = [] for h in range(self.rnn_depth): if self.rnn_bidirectional: l_fwd = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask) l_bwd = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask, backwards=True) l_h = ConcatLayer((l_fwd, l_bwd), axis=-1) else: l_h = LSTMLayer(l_h, num_units=self.rnn_hid_units, mask_input=l_mask) l_h_all.append(l_h) l_h = SliceLayer(ElemwiseSumLayer(l_h_all), indices=-1, axis=1) for i in range(self.nn_dense_depth): l_h = DenseLayer(l_h, num_units=self.nn_dense_hid_units) l_mean = DenseLayer(l_h, self.z_dim, nonlinearity=None) l_cov = DenseLayer(l_h, self.z_dim, nonlinearity=softplus_safe) return (l_in, l_mask), (l_mean, l_cov)
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.Orthogonal()): gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm') l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def getTrainedRNN(): """Read from file and set the params""" # TODO: Refactor so as to do this only once) input_size = 39 hidden_size = 50 num_output_classes = 29 learning_rate = 0.001 output_size = num_output_classes + 1 batch_size = None input_seq_length = None gradient_clipping = 5 l_in = InputLayer(shape=(batch_size, input_seq_length, input_size)) n_batch, n_time_steps, n_features = l_in.input_var.shape # Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu) l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu) l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True) l_rec_accumulation = ElemwiseSumLayer([l_rec_forward, l_rec_backward]) l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1, hidden_size)) l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu) l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear) l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size)) # Reshaping back l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size)) with np.load('CTC_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable=True) output = lasagne.layers.get_output(l_out_softmax_reshaped) return l_in, output
def nn_fn(self): l_in = InputLayer((None, self.max_length, self.vocab_size)) l_current = l_in for h in range(self.nn_depth): l_h_x = DenseLayer(l_in, num_units=self.nn_hid_units, nonlinearity=None, b=None) l_h_h = DenseLayer(l_current, num_units=self.nn_hid_units, nonlinearity=None, b=None) l_current = NonlinearityLayer( ElemwiseSumLayer([l_h_x, l_h_h]), nonlinearity=self.nn_hid_nonlinearity) mean_nn = DenseLayer(l_current, num_units=self.z_dim, nonlinearity=linear, b=None) cov_nn = DenseLayer(l_current, num_units=self.z_dim, nonlinearity=elu_plus_one, b=None) return mean_nn, cov_nn
def create_attention(self, gru_con, in_con_mask, condition, batch_size, n_hidden_con, **kwargs): # (batch_size, n_attention) gru_cond2 = non_flattening_dense_layer(gru_con, self.in_con_mask, self.n_attention, nonlinearity=None) gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None) gru_que2 = dimshuffle(gru_que2, (0, 'x', 1)) att = ElemwiseSumLayer([gru_cond2, gru_que2]) att = NonlinearityLayer(att, T.tanh) att = SliceLayer(non_flattening_dense_layer(att, self.in_con_mask, 1, nonlinearity=None), indices=0, axis=2) att_softmax = SequenceSoftmax(att, self.in_con_mask) rep = ElemwiseMergeLayer( [ForgetSizeLayer(dimshuffle(att_softmax, (0, 1, 'x'))), gru_con], T.mul) return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s: (s[0], ) + s[2:])
def add_residual_block(self, *args, **kwargs): # set the root (if not done before) if self.root is None: self.root = self.model # add a residual block super(RoR, self).add_residual_block(*args, kwargs) # get the layer with the element wise sum model = self.model last = None while not isinstance(model, ElemwiseSumLayer): last = model model = model.input_layer layers = [] # create shortcut from all underling layers current = len(self.resblocks) for typ, step in zip(self.othertypes, self.steps): if not current % step: i = current - step - 1 layer = self.resblocks[i] if i >= 0 else self.root layers.append(self.shortcut(layer, model.output_shape, typ)) else: break # apply the changes to the network (if any) if not layers: return model = ElemwiseSumLayer(model.input_layers + layers) if last is not None: last.input_layer = model else: self.model = model self.resblocks[-1] = model
def hypernet( net, hidden_size=512, layers=0, flow='IAF', output_size=None, nlb=nlb, copies=1, # 2 for bias + scale; 1 for scale only **kargs): if output_size is None: all_layers = lasagne.layers.get_all_layers(net) output_size = sum([ layer.output_shape[1] for layer in all_layers if not isinstance(layer, InputLayer) and not isinstance(layer, ElemwiseSumLayer) and nlb(layer) ]) * copies logdets_layers = [] layer = InputLayer(shape=(None, output_size)) layer_temp = LinearFlowLayer(layer) layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if layers > 0: if flow == 'RealNVP': layer, ld_layers = NVP_dense_layer(layer, hidden_size, layers, **kargs) elif flow == 'IAF': layer, ld_layers = IAF_dense_layer(layer, hidden_size, layers, **kargs) logdets_layers = logdets_layers + ld_layers return layer, ElemwiseSumLayer(logdets_layers), output_size
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)')): dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W) weights.append(dbn_layers[2].W) weights.append(dbn_layers[3].W) weights.append(dbn_layers[4].W) biases.append(dbn_layers[1].b) biases.append(dbn_layers[2].b) biases.append(dbn_layers[3].b) biases.append(dbn_layers[4].b) gate_parameters = Gate(W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(weights, biases, l_reshape1) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer( l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice1, num_units=26, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def make_block(self, name, input, units): self.make_layer(name + '-A', input, units, alpha=0.25) self.make_layer(name + '-B', self.last_layer(), units, alpha=1.0) if args.generator_residual: self.network[name + '-R'] = ElemwiseSumLayer( [input, self.last_layer()]) return self.last_layer()
def vox_res(lin): l = nl(bn(lin)) l = conv3d(l, 32) l = nl(bn(l)) l = conv3d(l, 32) l = ElemwiseSumLayer([l,lin]) return l
def create_blstm_dropout(input_vars, mask_vars, num_inputs, hidden_layer_size, num_outputs, dropout=0.2, noise=0.2): network = InputLayer((None, None, num_inputs), input_vars) mask = InputLayer((None, None), mask_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=noise) for i in range(4): forward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True) backward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True, backwards=True) network = DropoutLayer( GaussianNoiseLayer(ElemwiseSumLayer([forward, backward]), noise), dropout) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def normal(ilayer,fmaps,activation,t='enc',ltype='normal'): if t == 'enc': x = batch_norm(lasagne.layers.Conv2DLayer( ilayer, num_filters=fmaps[0],filter_size=(3,3), nonlinearity=None,pad=1, W=initf )) else: x = batch_norm(lasagne.layers.Conv2DLayer( ilayer, num_filters=fmaps[0],filter_size=(3,3), nonlinearity=activation,pad=1, W=initf )) if ltype == 'normal': x = batch_norm(lasagne.layers.Conv2DLayer( x, num_filters=fmaps[1],filter_size=(3,3), nonlinearity=activation,pad=1, W=initf )) elif ltype == 'residual': x = batch_norm(lasagne.layers.Conv2DLayer( x, num_filters=fmaps[1],filter_size=(3,3), nonlinearity=None,pad=1, W=initf )) y = lasagne.layers.Conv2DLayer( ilayer, num_filters=fmaps[1],filter_size=(1,1), nonlinearity=None,pad='same', W=initf) x = ElemwiseSumLayer([x, y]) x = NonlinearityLayer(x,nonlinearity=activation) return x
def make_recursive_block(self, name, input, units=128, filter_size=(3, 3), stride=(1, 1), pad=(1, 1), res_blocks=9): residual = input input = ConvLayer(input, units, filter_size, stride=stride, pad=pad, nonlinearity=None) out = input for _ in range( res_blocks): # number of res units per one recursive block out = lasagne.layers.rrelu(out) out = ConvLayer(out, units, filter_size, stride=stride, pad=pad, nonlinearity=None) out = lasagne.layers.rrelu(out) out = ConvLayer(out, units, filter_size, stride=stride, pad=pad, nonlinearity=None) out = ElemwiseSumLayer([out, input]) out = lasagne.layers.rrelu(out) out = ConvLayer(out, units, filter_size, stride=stride, pad=pad, nonlinearity=None) out = ElemwiseSumLayer([out, residual]) self.network[name + '&'] = out return out
def build_residual_block(incoming_layer, ratio_n_filter=1.0, ratio_size=1.0, has_left_branch=False, upscale_factor=4, ix=''): simple_block_name_pattern = [ 'res%s_branch%i%s', 'bn%s_branch%i%s', 'res%s_branch%i%s_relu' ] net = OrderedDict() # right branch net_tmp, last_layer_name = build_simple_block( incoming_layer, map(lambda s: s % (ix, 2, 'a'), simple_block_name_pattern), int(layers.get_output_shape(incoming_layer)[1] * ratio_n_filter), 1, int(1.0 / ratio_size), 0) net.update(net_tmp) net_tmp, last_layer_name = build_simple_block( net[last_layer_name], map(lambda s: s % (ix, 2, 'b'), simple_block_name_pattern), layers.get_output_shape(net[last_layer_name])[1], 3, 1, 1) net.update(net_tmp) net_tmp, last_layer_name = build_simple_block( net[last_layer_name], map(lambda s: s % (ix, 2, 'c'), simple_block_name_pattern), layers.get_output_shape(net[last_layer_name])[1] * upscale_factor, 1, 1, 0, nonlin=None) net.update(net_tmp) right_tail = net[last_layer_name] left_tail = incoming_layer # left branch if has_left_branch: net_tmp, last_layer_name = build_simple_block( incoming_layer, map(lambda s: s % (ix, 1, ''), simple_block_name_pattern), int( layers.get_output_shape(incoming_layer)[1] * 4 * ratio_n_filter), 1, int(1.0 / ratio_size), 0, nonlin=None) net.update(net_tmp) left_tail = net[last_layer_name] net['res%s' % ix] = ElemwiseSumLayer([left_tail, right_tail], coeffs=1) net['res%s_relu' % ix] = NonlinearityLayer(net['res%s' % ix], nonlinearity=rectify) return net, 'res%s_relu' % ix
def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'): # GPU cached data _M = theano.shared(M.T.astype(float64)) dum = Th.vector('dum') # Get layer sizes K = [] for i in range(len(W1)): K.append([W1[i].shape[0], W2[i].shape[0]]) K.append([M.T.shape[1], M.T.shape[1]]) # We have weights to discover, init = 2/(Nin+Nout) H = theano.shared( sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) * random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64)) fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H) # Split in two pathways, one for each source's autoencoder H1 = (len(W1) + 1) * [None] H2 = (len(W1) + 1) * [None] H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1) H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1) # Put the subsequent layers for i in range(len(W1)): H1[i + 1] = DenseLayer(H1[i], num_units=K[i + 1][0], W=W1[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) H2[i + 1] = DenseLayer(H2[i], num_units=K[i + 1][1], W=W2[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) # Add the two approximations R = ElemwiseSumLayer([H1[-1], H2[-1]]) # Cost function Ro = get_output(R) + eps cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro) + 0 * Th.mean(dum) for i in range(len(H1) - 1): cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean( abs(get_output(H2[i]))) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([d]).astype(float64), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float64)).T + eps _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T return _r, _r1, _r2, er
def create_deep_rnn(layer, layer_class, depth, layer_mask=None, residual=False, skip_connections=False, bidir=False, dropout=None, init_state_layers=None, name=None, **kwargs): """ (Deep) RNN with possible skip/residual connections, bidirectional, dropout """ if init_state_layers: assert (len(init_state_layers) == depth) layers = [layer] for i in range(depth): if skip_connections and i > 0: layer = concat([layers[0], layer], axis=2) if init_state_layers: hid_init = init_state_layers[i] else: hid_init = init.Constant(0.) new_layer = layer_class(layer, hid_init=hid_init, mask_input=layer_mask, name=name, **kwargs) if bidir: layer_bw = layer_class(layer, mask_input=layer_mask, backwards=True, name=name, **kwargs) new_layer = concat([new_layer, layer_bw], axis=2) if residual: layer = ElemwiseSumLayer([layer, new_layer]) else: layer = new_layer if skip_connections and i == depth - 1: layer = concat([layer] + layers[1:], axis=2) if dropout: layer = DropoutLayer(layer, p=dropout) # We need to apply the mask, otherwise there are problems with multiple # layers if layer_mask and i < depth - 1: layer = apply_mask(layer, layer_mask) layers.append(layer) return layers[1:]
def fan_module_improved(inp, net, prefix, features, nb_filter, scale, upsampling_strategy="repeat"): r""" Implementation for simple LSTM block for feature based manipulation Takes input x and features and performs pixelwise manipulation of inp: $$ y = x \sigma(f(z)) + \tanh(g(z)) (1 - \sigma(f(z))) $$ $f$ and $g$ are functions implemented by 1x1 convolutions followed by upsampling to match the dimension of $x$. """ # Input gate directly derived from feature representation. Sigmoid rescales to 0...1 input_gate = ConvLayer(features, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=sigmoid, b=nn.init.Constant(0.5)) # Addition gate uses inverse activation from input gate addition = ConvLayer(features, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=rectify) input_gate_upsampled = upsample(input_gate, scale, mode=upsampling_strategy) addition_gate_upsampled = upsample(addition, scale, mode=upsampling_strategy) x_forget = ElemwiseProdLayer([inp, input_gate_upsampled], cropping=(None, None, "center", "center")) x_added = ElemwiseSumLayer([x_forget, addition_gate_upsampled], cropping=(None, None, "center", "center")) ll = [ input_gate, addition, input_gate_upsampled, addition_gate_upsampled, x_forget, x_added ] layers = locals() net.update({ prefix + "/" + k: layers[k] for k in layers.keys() if layers[k] in ll }) return x_added
def add_residual_block(self, num_filters=None, dim_inc=False): shortcut = self.shortcut(self.model, num_filters=num_filters, dim_inc=dim_inc) self.shortcuts.append(shortcut) residual = self.residual(shortcut, num_filters, dim_inc=False) self.model = ElemwiseSumLayer([residual, shortcut]) self.resblocks.append(self.model)
def make_block(self, name, input, units): # create another layer self.make_layer(name + '-A', input, units, alpha=0.1) # performs elementwise sum of input layers, # which all must have same shape return ElemwiseSumLayer([ input, self.last_layer() ]) if args.generator_residual else self.last_layer()
def make_block(self, name, input, units): self.make_res_block(name + '-A', input, units, alpha=0.1) # print('input for make_block shape: ', input.shape) # print('units for make_block: ',units) # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0) return ElemwiseSumLayer([ input, self.last_layer() ]) if args.generator_residual else self.last_layer()