def test_get_output_for_shared_axes(self, shared_axes): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer((2, 4, 7, 9), shared_axes=shared_axes) input = theano.shared(numpy.ones((2, 4, 7, 9))) result = layer.get_output_for(input) result_eval = result.eval() # check if the dropout mask is the same across the specified axes: # compute the mean across these axes and compare against the full # output, broadcasting across the shared axes, to see if it matches assert np.allclose(result_eval.mean(axis=shared_axes, keepdims=True), result_eval)
def test_specified_rng(self, input_layer): from lasagne.layers.noise import DropoutLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval2 = result.eval() assert numpy.allclose(result_eval1, result_eval2)
def conv_pool_block(l, num_filters, filter_length, i_block): l = DropoutLayer(l, p=self.drop_prob) l = Conv2DLayer(l, num_filters=num_filters, filter_size=[filter_length, 1], nonlinearity=identity, name='combined_conv_{:d}'.format(i_block)) if self.double_time_convs: l = Conv2DLayer(l, num_filters=num_filters, filter_size=[filter_length, 1], nonlinearity=identity, name='combined_conv_{:d}'.format(i_block)) if self.batch_norm: l = BatchNormLayer(l, epsilon=1e-4, alpha=self.batch_norm_alpha, nonlinearity=self.later_nonlin) else: l = NonlinearityLayer(l, nonlinearity=self.later_nonlin) l = Pool2DLayer(l, pool_size=[self.pool_time_length, 1], stride=[1, 1], mode=self.later_pool_mode) l = StrideReshapeLayer(l, n_stride=self.pool_time_stride) l = NonlinearityLayer(l, self.later_pool_nonlin) return l
def get_layers(self): l = InputLayer([None, self.in_chans, self.input_time_length, 1]) if self.split_first_layer: l = DimshuffleLayer(l, pattern=[0, 3, 2, 1]) l = Conv2DLayer(l, num_filters=self.n_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') l = Conv2DAllColsLayer(l, num_filters=self.n_filters_spat, filter_size=[1, -1], nonlinearity=identity, name='spat_conv') else: #keep channel dim in first dim, so it will also be convolved over l = Conv2DLayer(l, num_filters=self.num_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') if self.batch_norm: l = BatchNormLayer(l, epsilon=1e-4, alpha=self.batch_norm_alpha, nonlinearity=self.conv_nonlin) else: l = NonlinearityLayer(l, nonlinearity=self.conv_nonlin) l = Pool2DLayer(l, pool_size=[self.pool_time_length, 1], stride=[1, 1], mode=self.pool_mode) l = NonlinearityLayer(l, self.pool_nonlin) l = StrideReshapeLayer(l, n_stride=self.pool_time_stride) l = DropoutLayer(l, p=self.drop_prob) l = Conv2DLayer(l, num_filters=self.n_classes, filter_size=[self.final_dense_length, 1], nonlinearity=identity, name='final_dense') l = FinalReshapeLayer(l) l = NonlinearityLayer(l, softmax) return lasagne.layers.get_all_layers(l)
def test_specified_rng(self, input_layer): from lasagne.layers.noise import DropoutLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 rng = get_rng() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval2 = result.eval() set_rng(rng) # reset to original RNG for other tests assert numpy.allclose(result_eval1, result_eval2)
def test_get_output_for_p_float16(self, input_layer): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer(input_layer, p=numpy.float16(0.5)) input = theano.shared(numpy.ones((100, 100), dtype=numpy.float16)) assert layer.get_output_for(input).dtype == input.dtype
def layer_p_02(self, input_layer): from lasagne.layers.noise import DropoutLayer return DropoutLayer(input_layer, p=0.2)
def layer_no_rescale(self, input_layer): from lasagne.layers.noise import DropoutLayer return DropoutLayer(input_layer, rescale=False)
def test_get_output_for_p_float32(self, input_layer): from lasagne.layers.noise import DropoutLayer layer = DropoutLayer(input_layer, p=numpy.float32(0.5)) input = theano.shared(numpy.ones((100, 100), dtype=numpy.float32)) assert layer.get_output_for(input).dtype == input.dtype
def get_layers(self): l = InputLayer([None, self.in_chans, self.input_time_length, 1]) if self.split_first_layer: l = DimshuffleLayer(l, pattern=[0, 3, 2, 1]) l = DropoutLayer(l, p=self.drop_in_prob) l = Conv2DLayer(l, num_filters=self.num_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') if self.double_time_convs: l = Conv2DLayer(l, num_filters=self.num_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') l = Conv2DAllColsLayer(l, num_filters=self.num_filters_spat, filter_size=[1, -1], nonlinearity=identity, name='spat_conv') else: #keep channel dim in first dim, so it will also be convolved over l = DropoutLayer(l, p=self.drop_in_prob) l = Conv2DLayer(l, num_filters=self.num_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') if self.double_time_convs: l = Conv2DLayer(l, num_filters=self.num_filters_time, filter_size=[self.filter_time_length, 1], nonlinearity=identity, name='time_conv') if self.batch_norm: l = BatchNormLayer(l, epsilon=1e-4, alpha=self.batch_norm_alpha, nonlinearity=self.first_nonlin) else: l = NonlinearityLayer(l, nonlinearity=self.first_nonlin) l = Pool2DLayer(l, pool_size=[self.pool_time_length, 1], stride=[1, 1], mode=self.first_pool_mode) l = StrideReshapeLayer(l, n_stride=self.pool_time_stride) l = NonlinearityLayer(l, self.first_pool_nonlin) def conv_pool_block(l, num_filters, filter_length, i_block): l = DropoutLayer(l, p=self.drop_prob) l = Conv2DLayer(l, num_filters=num_filters, filter_size=[filter_length, 1], nonlinearity=identity, name='combined_conv_{:d}'.format(i_block)) if self.double_time_convs: l = Conv2DLayer(l, num_filters=num_filters, filter_size=[filter_length, 1], nonlinearity=identity, name='combined_conv_{:d}'.format(i_block)) if self.batch_norm: l = BatchNormLayer(l, epsilon=1e-4, alpha=self.batch_norm_alpha, nonlinearity=self.later_nonlin) else: l = NonlinearityLayer(l, nonlinearity=self.later_nonlin) l = Pool2DLayer(l, pool_size=[self.pool_time_length, 1], stride=[1, 1], mode=self.later_pool_mode) l = StrideReshapeLayer(l, n_stride=self.pool_time_stride) l = NonlinearityLayer(l, self.later_pool_nonlin) return l l = conv_pool_block(l, self.num_filters_2, self.filter_length_2, 2) l = conv_pool_block(l, self.num_filters_3, self.filter_length_3, 3) l = conv_pool_block(l, self.num_filters_4, self.filter_length_4, 4) # Final part, transformed dense layer l = DropoutLayer(l, p=self.drop_prob) l = Conv2DLayer(l, num_filters=self.n_classes, filter_size=[self.final_dense_length, 1], nonlinearity=identity, name='final_dense') l = FinalReshapeLayer(l) l = NonlinearityLayer(l, self.final_nonlin) return lasagne.layers.get_all_layers(l)
def get_layers(self): def resnet_residual_block(model, increase_units_factor=None, half_time=False): """Calling residual_block function with correct attributes from this object. Parameters ---------- model : increase_units_factor : (Default value = None) half_time : (Default value = False) Returns ------- Final layer of created residual block. """ return residual_block(model, batch_norm_epsilon=self.batch_norm_epsilon, batch_norm_alpha=self.batch_norm_alpha, increase_units_factor=increase_units_factor, half_time=half_time, nonlinearity=self.nonlinearity, projection=self.projection, survival_prob=self.survival_prob, add_after_nonlin=self.add_after_nonlin, reduction_method=self.reduction_method, reduction_pool_mode=self.reduction_pool_mode) model = InputLayer([None, self.in_chans, self.input_time_length, 1]) if self.split_first_layer: # shift channel dim out model = DimshuffleLayer(model, (0, 3, 2, 1)) # first timeconv model = Conv2DLayer(model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length, 1), stride=(1, 1), nonlinearity=identity, pad='same', W=lasagne.init.HeNormal(gain='relu')) # now spatconv model = batch_norm(Conv2DLayer( model, num_filters=self.n_first_filters, filter_size=(1, self.in_chans), stride=(1, 1), nonlinearity=self.nonlinearity, pad=0, W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) else: model = batch_norm(Conv2DLayer( model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length, 1), stride=(1, 1), nonlinearity=self.nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) for _ in range(self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=2, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=1.5, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) if self.drop_before_pool: model = DropoutLayer(model, p=0.5) # Replacement for global mean pooling if self.final_aggregator == 'pool': model = Pool2DLayer(model, pool_size=(self.final_pool_length, 1), stride=(1, 1), mode='average_exc_pad') model = Conv2DLayer(model, filter_size=(1, 1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) elif self.final_aggregator == 'conv': model = Conv2DLayer(model, filter_size=(self.final_pool_length, 1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) else: raise ValueError("Unknown final aggregator {:s}".format( self.final_aggregator)) model = FinalReshapeLayer(model) model = NonlinearityLayer(model, nonlinearity=self.final_nonlin) model = set_survival_probs_to_linear_decay(model, self.survival_prob) return lasagne.layers.get_all_layers(model)
def build_nmt_encoder_decoder(dim_word=1, n_embd=100, n_units=500, n_proj=200, state=None, rev_state=None, context_type=None, attention=False, drop_p=None): enc = OrderedDict() enc['input'] = InputLayer((None, None), name='input') enc_mask = enc['mask'] = InputLayer((None, None), name='mask') enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask') enc['input_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='input_emb') ### ENCODER PART ### # rnn encoder unit hid_init = Constant(0.) hid_init_rev = Constant(0.) encoder_unit = get_rnn_unit(enc.values()[-1], enc_mask, enc_rev_mask, hid_init, hid_init_rev, n_units, prefix='encoder_') enc.update(encoder_unit) # context layer = decoder's initial state of shape (batch_size, num_units) context = enc.values()[-1] # net['context'] if context_type == 'last': enc['context2init'] = SliceLayer(context, indices=-1, axis=1, name='last_encoder_context') elif context_type == 'mean': enc['context2init'] = ExpressionLayer(context, mean_over_1_axis, output_shape='auto', name='mean_encoder_context') ### DECODER PART ### W_init2proj, b_init2proj = GlorotUniform(), Constant(0.) enc['init_state'] = DenseLayer(enc['context2init'], num_units=n_units, W=W_init2proj, b=b_init2proj, nonlinearity=tanh, name='decoder_init_state') if state is None: init_state = enc['init_state'] init_state_rev = None #if rev_state is None else init_state if not attention: # if simple attetion the context is 2D, else 3D context = enc['context2init'] else: init_state = state init_state_rev = rev_state context = enc['context_input'] = \ InputLayer((None, n_units), name='ctx_input') # (batch_size, nfeats) # (batch_size, valid ntsteps) enc['target'] = InputLayer((None, None), name='target') dec_mask = enc['target_mask'] = InputLayer((None, None), name='target_mask') enc['target_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='target_emb') prevdim = n_embd prev2rnn = enc.values()[-1] # it's either emb or prev2rnn/noise decoder_unit = get_rnn_unit(prev2rnn, dec_mask, None, init_state, None, n_units, prefix='decoder_', context=context, attention=attention) enc.update(decoder_unit) if attention: ctxs = enc.values()[-1] ctxs_shape = ctxs.output_shape def get_ctx(x): return ctxs.ctx context = enc['context'] = ExpressionLayer(ctxs, function=get_ctx, output_shape=ctxs_shape, name='context') # return all values' # reshape for feed-forward layer # 2D shapes of (batch_size * num_steps, num_units/num_feats) enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units), name='flatten_rnn2proj') enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim), name='flatten_prev') if isinstance(context, ExpressionLayer): ctx2proj = enc['ctx2proj'] = ReshapeLayer(context, (-1, ctxs_shape[-1]), name='flatten_ctxs') else: ctx2proj = context # load shared parameters W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.) W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.) W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.) # perturb rnn-to-projection by noise if drop_p is not None: rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj, sigma=drop_p, name='noise_rnn2proj') prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj, sigma=drop_p, name='drop_prev2proj') ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj, sigma=drop_p, name='noise_ctx2proj') # project rnn enc['rnn_proj'] = DenseLayer(rnn2proj, num_units=n_proj, nonlinearity=linear, W=W_rnn2proj, b=b_rnn2proj, name='rnn_proj') # project raw targets enc['prev_proj'] = DenseLayer(prev2proj, num_units=n_proj, nonlinearity=linear, W=W_prev2proj, b=b_prev2proj, name='prev_proj') # project context enc['ctx_proj'] = DenseLayer(ctx2proj, num_units=n_proj, nonlinearity=linear, W=W_ctx2proj, b=b_ctx2proj, name='ctx_proj') # reshape back for merging n_batch = enc['input'].input_var.shape[0] rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'], (n_batch, -1, n_proj), name='reshaped_rnn2proj') prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'], (n_batch, -1, n_proj), name='reshaped_prev') if isinstance(context, ExpressionLayer): ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj), name='reshaped_prev') else: ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'], pattern=(0, 'x', 1), name='reshaped_context') # combine projections into shape (batch_size, n_steps, n_proj) enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge], merge_function=tanh_add, name='proj_merge') # reshape for output regression projection enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj), name='flatten_proj_merge') # perturb concatenated regressors by noise if drop_p is not None: # if noise_type == 'binary': enc['noise_output'] = DropoutLayer(enc.values()[-1], p=drop_p, name='noise_output') # regress on combined (perturbed) projections out = get_output_unit(enc['target'], enc.values()[-1], dim_word) enc.update(out) # update graph return enc
def conv_layer(incoming, num_filters): tmp = Conv2DLayer(incoming, num_filters, 3, pad='valid') tmp = BatchNormLayer(tmp) if dropout: tmp = DropoutLayer(tmp, 0.3) return NonlinearityLayer(tmp)
def init_from_string(self, arch_string): self.check_arch_string(arch_string) self.set_output_dir(self.name) self.input_layer = InputLayer(shape=(None,)+ self.input_shape, name='input') net = self.input_layer net = DropoutLayer(net, p=self.denoising) # Encoder self._layers = [] self.conv_layers = [] self.encode_layers = [] self.middle_pool_size = self.pixel_size for layer_string in arch_string.split(): if layer_string.startswith("c"): non_lin = layer_string[-1] num_filter, filter_size = map(int,layer_string[1:-1].split(".")) net = Conv2DLayerFast(net, num_filters=num_filter, filter_size=filter_size, nonlinearity=act(non_lin), pad='same') self.conv_layers.append(net) elif layer_string.startswith("p"): pool_size = int(layer_string[1]) net = MaxPool2DLayerFast(net, pool_size=(pool_size, pool_size)) self.middle_pool_size /= 2 elif layer_string.startswith("d"): non_lin = layer_string[-1] num_units, noise = map(int,layer_string[1:-1].split(".")) # net = DenseLayer(batch_norm(lasagne.layers.dropout(net, p=noise*0.1)), num_units=num_units, nonlinearity=act(non_lin)) net = DenseLayer(lasagne.layers.dropout(net, p=noise*0.1), num_units=num_units, nonlinearity=act(non_lin)) self.encode_layers.append(net) self._layers.append(net) # Decoder for lyr in self._layers[::-1][:-1]: if isinstance(lyr, (Conv2DLayerFast,)): #net = Conv2DLayerFast(net, num_filters=lyr.input_layer.output_shape[1], filter_size=(lyr.filter_size, lyr.filter_size), nonlinearity=lyr.nonlinearity, pad='same' ) net = Conv2DLayerFast(net, num_filters=lyr.input_layer.output_shape[1], filter_size=lyr.filter_size, nonlinearity=lyr.nonlinearity, pad='same' ) elif isinstance(lyr, (MaxPool2DLayerFast,)): if len(net.output_shape) == 2: net = ReshapeLayer(net, shape=([0], lyr.input_layer.num_filters, self.middle_pool_size, self.middle_pool_size)) net = Upscale2DLayer(net, scale_factor=lyr.pool_size) elif isinstance(lyr, (DenseLayer,)): net = TransposedDenseLayer(net, num_units=numpy.prod(lyr.input_layer.input_shape[1:]), W=lyr.W, nonlinearity=lyr.nonlinearity) lyr = self._layers[0] if isinstance(lyr, (Conv2DLayerFast,)): #net = Conv2DLayer(net, num_filters=self.n_channel, filter_size=(lyr.filter_size, lyr.filter_size), nonlinearity=lyr.nonlinearity, pad='same' ) net = Conv2DLayer(net, num_filters=self.n_channel, filter_size=lyr.filter_size, nonlinearity=lyr.nonlinearity, pad='same' ) elif isinstance(lyr, (MaxPool2DLayerFast,)): channels = 1 if len(net.output_shape) == 2 and isinstance(lyr.input_layer, (Conv2DLayerFast, Conv2DLayer)): channels = lyr.input_layer.num_filters net = ReshapeLayer(net, shape=([0], channels, self.middle_pool_size, self.middle_pool_size)) net = Upscale2DLayer(net, scale_factor=lyr.pool_size) elif isinstance(lyr, (DenseLayer,)): net = TransposedDenseLayer(net, num_units=numpy.prod(lyr.input_layer.input_shape[1:]), W=lyr.W, nonlinearity=lyr.nonlinearity) net = ReshapeLayer(net, name='output', shape=([0], -1)) pprint_layers(net) self._nn = NeuralNet(net, verbose=self.verbose, regression=True, objective_loss_function=squared_error)