def __init__(self, incoming, num_units, hidden_nonlinearity, gate_nonlinearity=LN.sigmoid, name=None, W_init=LI.GlorotUniform(), b_init=LI.Constant(0.), hidden_init=LI.Constant(0.), hidden_init_trainable=True): if hidden_nonlinearity is None: hidden_nonlinearity = LN.identity if gate_nonlinearity is None: gate_nonlinearity = LN.identity super(GRULayer, self).__init__(incoming, name=name) input_shape = self.input_shape[2:] input_dim = ext.flatten_shape_dim(input_shape) # self._name = name # Weights for the initial hidden state self.h0 = self.add_param(hidden_init, (num_units, ), name="h0", trainable=hidden_init_trainable, regularizable=False) # Weights for the reset gate self.W_xr = self.add_param(W_init, (input_dim, num_units), name="W_xr") self.W_hr = self.add_param(W_init, (num_units, num_units), name="W_hr") self.b_r = self.add_param(b_init, (num_units, ), name="b_r", regularizable=False) # Weights for the update gate self.W_xu = self.add_param(W_init, (input_dim, num_units), name="W_xu") self.W_hu = self.add_param(W_init, (num_units, num_units), name="W_hu") self.b_u = self.add_param(b_init, (num_units, ), name="b_u", regularizable=False) # Weights for the cell gate self.W_xc = self.add_param(W_init, (input_dim, num_units), name="W_xc") self.W_hc = self.add_param(W_init, (num_units, num_units), name="W_hc") self.b_c = self.add_param(b_init, (num_units, ), name="b_c", regularizable=False) self.gate_nonlinearity = gate_nonlinearity self.num_units = num_units self.nonlinearity = hidden_nonlinearity
def __init__(self, incoming, H, verbose=False, axes='auto', epsilon=1e-4, alpha=0.1, beta=init.Constant(0), gamma=init.Constant(1), mean=init.Constant(0), inv_std=init.Constant(1), **kwargs): super(BatchNormLayer, self).__init__(incoming, **kwargs) self.verbose = verbose self.H = H
def enc_net(_incoming, output_channels, drop_rate=0.3, nonlinearity=None): # #_noise = L.GaussianNoiseLayer(_incoming, sigma=0.1) _drop1 = L.DropoutLayer(_incoming, p=drop_rate, rescale=True) _fc1 = L.DenseLayer(_drop1, 4 * output_channels, W=I.Normal(0.02), b=I.Constant(0.1), nonlinearity=NL.rectify) _drop2 = L.DropoutLayer(_fc1, p=drop_rate, rescale=True) _fc2 = L.DenseLayer(_drop2, output_channels, W=I.Normal(0.02), b=I.Constant(0.1), nonlinearity=nonlinearity) return _fc2
def __init__(self, incoming, perc=99.9, alpha=0.1, beta=init.Constant(5.0), tight=20.0, bias=0.0, **kwargs): super(SoftThresPerc, self).__init__(incoming, **kwargs); self.perc = perc; self.alpha = alpha; self.tight = tight; self.bias = bias; self.beta = self.add_param(beta, (1,), 'beta', trainable=False, regularizable=False);
def ptb_lstm(input_var, vocabulary_size, hidden_size, seq_len, num_layers, dropout, batch_size): l_input = L.InputLayer(shape=(batch_size, seq_len), input_var=input_var) l_embed = L.EmbeddingLayer(l_input, vocabulary_size, hidden_size, W=init.Uniform(1.0)) l_lstms = [] for i in range(num_layers): l_lstm = L.LSTMLayer(l_embed if i == 0 else l_lstms[-1], hidden_size, ingate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal()), forgetgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), b=init.Constant(1.0)), cell=L.Gate( W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), outgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal())) l_lstms.append(l_lstm) l_drop = L.DropoutLayer(l_lstms[-1], dropout) l_out = L.DenseLayer(l_drop, num_units=vocabulary_size, num_leading_axes=2) l_out = L.ReshapeLayer( l_out, (l_out.output_shape[0] * l_out.output_shape[1], l_out.output_shape[2])) l_out = L.NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) return l_out
def __call__(self, layer, spec, shape, name=None, **tags): # case when user uses default init specs assert tags.get( 'variational', False), "Please declare param as variational to avoid confusion" if not isinstance(spec, dict): initial_rho = np.log(np.expm1(self.prior_std)) # std to rho assert np.isfinite(initial_rho), "too small std to initialize correctly. Please pass explicit"\ " initializer (dict with {'mu':mu_init, 'rho':rho_init})." spec = {'mu': spec, 'rho': init.Constant(initial_rho)} mu_spec, rho_spec = spec['mu'], spec['rho'] rho = layer.add_param(rho_spec, shape, name=(name or 'unk') + '.rho', **tags) mean = layer.add_param(mu_spec, shape, name=(name or 'unk') + '.mu', **tags) # Reparameterization trick e = self.srng.normal(shape, std=1) W = mean + T.log1p(T.exp(rho)) * e # KL divergence KL(q,p) = E_(w~q(w|x)) [log q(w|x) - log P(w)] aka # variational cost q_p = T.sum( self.log_posterior_approx(W, mean, rho) - self.log_prior(W)) # accumulate variational cost layer._bbwrap_var_cost += q_p return W
def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, num_leading_axes=1, p=0.5, logit_p=None, temp=0.1, shared_axes=(), noise_samples=None, **kwargs): super(DenseConcreteDropoutLayer, self).__init__(incoming, num_units, W, b, nonlinearity, num_leading_axes, p, shared_axes=(), noise_samples=None, **kwargs) self.temp = temp self.logit_p = logit_p self.init_params()
def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, num_leading_axes=1, p=0.5, log_sigma2=None, shared_axes=(), noise_samples=None, **kwargs): super(DenseGaussianDropoutLayer, self).__init__(incoming, num_units, W, b, nonlinearity, num_leading_axes, p, shared_axes=(), noise_samples=None, **kwargs) self.p = p self.log_sigma2 = log_sigma2 self.init_params()
def __init__(self, W_x=init.Normal(1.), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_x = W_x self.b = b self.nonlinearity = nonlinearity
def __init__(self, incoming_vertex, incoming_edge, num_filters, filter_size, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): self.vertex_shape = incoming_vertex.output_shape self.edge_shape = incoming_edge.output_shape self.input_shape = incoming_vertex.output_shape incomings = [incoming_vertex, incoming_edge] self.vertex_incoming_index = 0 self.edge_incoming_index = 1 super(GraphConvLayer, self).__init__(incomings, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_filters = num_filters self.filter_size = filter_size self.W = self.add_param(W, self.get_W_shape(), name="W") if b is None: self.b = None else: self.b = self.add_param(b, (num_filters, ), name="b", regularizable=False)
def conv_params( num_filters, filter_size=(3, 3), pad=1, #border_mode='same', nonlinearity=leaky_rectify, W=init.Orthogonal(gain=1.0), b=init.Constant(0.05), untie_biases=True, **kwargs): args = { 'num_filters': num_filters, 'filter_size': filter_size, #'border_mode': border_mode, 'pad': pad, 'nonlinearity': nonlinearity, 'W': W, 'b': b, 'untie_biases': untie_biases, } args.update(kwargs) if CC: args['dimshuffle'] = False else: args.pop('partial_sum', None) return args
def __init__(self, incoming, num_filters, filter_size, group=1, stride=(1, 1), border_mode="valid", untie_biases=False, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, convolution=T.nnet.conv2d, **kwargs): self.group = group # actually border_mode is useless in lasagne so I removed it super(CaffeConv2DLayer, self).__init__(incoming, num_filters, filter_size, stride=stride, pad=border_mode, untie_biases=untie_biases, W=W, b=b, nonlinearity=nonlinearity, convolution=convolution, **kwargs) self.border_mode = border_mode
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1, 1), crop=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, convolution=T.nnet.ConvTransp3D, output_size=None, **kwargs): super(Conv3DLayerTransposed, self).__init__(incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=3, **kwargs) self.crop = self.pad del self.pad self.convolution = convolution self.output_size = output_size
def __init__(self, incoming, alpha=init.Constant(0.25), shared_axes='auto', **kwargs): super(ParametricRectifierLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': self.shared_axes = (0, ) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int): self.shared_axes = (shared_axes, ) else: self.shared_axes = shared_axes shape = [ size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes ] if any(size is None for size in shape): raise ValueError("ParametricRectifierLayer needs input sizes for " "all axes that alpha's are not shared over.") self.alpha = self.add_param(alpha, shape, name="alpha", regularizable=False)
def __init__(self, incoming, scales=init.Constant(1), shared_axes='auto', **kwargs): super(ScaleLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': # default: share scales over all but the second axis shared_axes = (0, ) + tuple(range(2, len(self.input_shape))) elif isinstance(shared_axes, int): shared_axes = (shared_axes, ) self.shared_axes = shared_axes # create scales parameter, ignoring all dimensions in shared_axes shape = [ size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes ] if any(size is None for size in shape): raise ValueError("ScaleLayer needs specified input sizes for " "all axes that scales are not shared over.") self.scales = self.add_param(scales, shape, 'scales', regularizable=False)
def transition(self, args, layers, dropout, name_prefix): # a transition 1x1 convolution followed by avg-pooling self.affine_relu_conv(args, layers, channels=layers[-1].output_shape[1], filter_size=1, dropout=dropout, name_prefix=name_prefix) layers.append( Pool2DLayer(layers[-1], 2, mode='average_inc_pad', name=name_prefix + '_pool')) #TODO: treat initialization as hyperparameter, but don't regularize parameters? layers.append( BatchNormLayer(layers[-1], name=name_prefix + '_bn', beta=None, gamma=None)) #TODO: add Gaussian noise if args.addActivationNoise: layers.append( GaussianNoiseLayer( layers[-1], name=name_prefix + '_Gn', sigma=init.Constant( args.invSigmoidActivationNoiseMagnitude), shared_axes='auto')) self.params_noise.append(layers[-1].sigma) #self.add_params_to_self(args, layers[-1]) #no parameters, beta=gamma=None return layers[-1]
def __init__(self, incoming, num_labels, mask_input=None, W=init.GlorotUniform(), b=init.Constant(0.), **kwargs): # This layer inherits from a MergeLayer, because it can have two # inputs - the layer input, and the mask. # We will just provide the layer input as incomings, unless a mask # input was provided. self.input_shape = incoming.output_shape incomings = [incoming] self.mask_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = 1 super(CRFLayer, self).__init__(incomings, **kwargs) self.num_labels = num_labels + 1 self.pad_label_index = num_labels num_inputs = self.input_shape[2] self.W = self.add_param(W, (num_inputs, self.num_labels, self.num_labels), name="W") if b is None: self.b = None else: self.b = self.add_param(b, (self.num_labels, self.num_labels), name="b", regularizable=False)
def __init__(self, incoming, num_units, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(NINLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units self.untie_biases = untie_biases num_input_channels = self.input_shape[1] self.W = self.add_param(W, (num_input_channels, num_units), name="W") if b is None: self.b = None else: if self.untie_biases: biases_shape = (num_units, ) + self.output_shape[2:] else: biases_shape = (num_units, ) self.b = self.add_param(b, biases_shape, name="b", regularizable=False)
def __init__(self, incoming, num_units_per_var, nonlinearity=nonlinearities.softmax, **kwargs): super(MultivariateDenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units_per_var = num_units_per_var self.num_vars = len(num_units_per_var) num_inputs = int(np.prod(self.input_shape[1:])) # generate Wi and bi for i in range(self.num_vars): # W mem_str = "W%d" % (i) if mem_str not in kwargs: # default values kwargs[mem_str] = init.GlorotUniform() self.__dict__[mem_str] = \ self.add_param(kwargs[mem_str], (num_inputs, num_units_per_var[i]), name=mem_str) # b mem_str = "b%d" % (i) if mem_str not in kwargs: # default values kwargs[mem_str] = init.Constant(0.) self.__dict__[mem_str] = \ self.add_param(kwargs[mem_str], (num_units_per_var[i],), name=mem_str, regularizable=False)
def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): """A convenience DenseLayer that cooperates with recurrent layers. Recurrent layers work on 3-dimensional data (batch size x time x number of units). By default, Lasagne DenseLayer flattens data to 2 dimensions. We could reshape the data or we could just use this RNNDenseLayer, which is more convenient. For documentation, refer to Lasagne's DenseLayer documenation. """ super(RNNDenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units num_inputs = self.input_shape[2] self.W = self.add_param(W, (num_inputs, num_units), name="W") if b is None: self.b = None else: self.b = self.add_param(b, (num_units, ), name="b", regularizable=False)
def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, num_leading_axes=1, logit_posterior_mean=None, logit_posterior_std=None, interval=[-10.0, 0.1], shared_axes=(), noise_samples=None, **kwargs): super(DenseLogNormalDropoutLayer, self).__init__(incoming, num_units, W, b, nonlinearity, num_leading_axes, shared_axes=(), noise_samples=None, **kwargs) self.logit_posterior_mean = logit_posterior_mean self.logit_posterior_std = logit_posterior_std self.interval = interval self.init_params()
def __init__(self, incoming, b=init.Constant(0), shared_axes='auto', shape='auto', **kwargs): super(CustomBiasLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': # default: share biases over all but the second axis shared_axes = (0, ) + tuple(range(2, len(self.input_shape))) elif isinstance(shared_axes, int): shared_axes = (shared_axes, ) self.shared_axes = shared_axes if b is None: self.b = None else: if shape == 'auto': # create bias parameter, ignoring all dimensions in shared_axes shape = [ size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes ] if any(size is None for size in shape): raise ValueError("if shape is automatically computed, the" "CustomBiasLayer needs specified input " "sizes for all axes that biases " "are not shared over.") self.b = self.add_param(b, shape, 'b', regularizable=False)
def __init__(self, Period=init.Uniform((10,100)), Shift=init.Uniform( (0., 1000.)), On_End=init.Constant(0.05)): self.Period = Period self.Shift = Shift self.On_End = On_End
def __init__(self, incoming, epsilon=1e-4, beta=init.Constant(0), gamma=init.Constant(1), **kwargs): super(LayerNorm, self).__init__(incoming, **kwargs) self.epsilon = epsilon n_features = self.input_shape[1] if beta is None: self.beta = None else: self.beta = self.add_param(beta, (n_features,), 'beta', trainable=True, regularizable=False) if gamma is None: self.gamma = None else: self.gamma = self.add_param(gamma, (n_features,), 'gamma', trainable=True, regularizable=True)
def __init__(self, incoming, num_labels, mask_input=None, W=init.GlorotUniform(), b=init.Constant(0.), **kwargs): self.input_shape = incoming.output_shape incomings = [incoming] self.mask_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = 1 super(CRFLayer, self).__init__(incomings, **kwargs) self.num_labels = num_labels + 1 self.pad_label_index = num_labels num_inputs = self.input_shape[2] self.W = self.add_param(W, (num_inputs, self.num_labels, self.num_labels), name="W") if b is None: self.b = None else: self.b = self.add_param(b, (self.num_labels, self.num_labels), name="b", regularizable=False)
def __init__(self, incoming, num_units, W=init.Normal(0.0001), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(CoupledDenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units num_inputs1 = int(self.input_shape[1] / 2) num_inputs2 = self.input_shape[1] - num_inputs1 self.W1 = self.add_param(W, (num_inputs1, num_units), name="cpds_W1") self.W21 = self.add_param(W, (num_units, num_inputs2), name="cpds_W21") self.W22 = self.add_param(W, (num_units, num_inputs2), name="cdds_W22") if b is None: self.b1 = None self.b21 = None self.b22 = None else: self.b1 = self.add_param(b, (num_units, ), name="cpds_b1", regularizable=False) self.b21 = self.add_param(b, (num_inputs2, ), name="cpds_b21", regularizable=False) self.b22 = self.add_param(b, (num_inputs2, ), name="cpds_b22", regularizable=False)
def __init__( self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1), # W_cell=init.Normal(0.1), b=init.Constant(0.), a_g=init.Uniform(0.1), b_g_hid_to_hid=init.Uniform(0.1), b_g_in_to_hid=init.Uniform(0.1), nonlinearity=nonlinearities.sigmoid, learn_a_g=True, learn_b_g_in_to_hid=True, learn_b_g_hid_to_hid=True): # TODO: Make formulation with peepholes and W_cell self.W_in = W_in self.W_hid = W_hid # if W_cell is not None: # self.W_cell = W_cell self.a_g = a_g if a_g is not None: self.learn_a_g = learn_a_g self.b_g_in_to_hid = b_g_in_to_hid if b_g_hid_to_hid is not None: self.learn_b_g_in_to_hid = learn_b_g_hid_to_hid self.b_g_hid_to_hid = b_g_hid_to_hid if b_g_hid_to_hid is not None: self.learn_b_g_hid_to_hid = learn_b_g_hid_to_hid self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def NVP_dense_layer( incoming, num_units=200, L=2, W=init.Normal(0.0001), r=init.Normal(0.0001), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, ): layer = incoming shape = layer.output_shape[1] logdets_layers = list() for c in range(L): layer = PermuteLayer(layer, shape) layer_temp = CoupledWNDenseLayer(layer, num_units, W=W, r=r, b=b, nonlinearity=nonlinearity) layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) return layer, logdets_layers
def __init__(self, Period=init.Uniform((10, 100)), Shift=init.Uniform((0., 1000.)), On_End=init.Constant(0.05), Event_W=init.GlorotUniform(), Event_b=init.Constant(0.), out_W=init.GlorotUniform(), out_b=init.Constant(0.)): self.Period = Period self.Shift = Shift self.On_End = On_End self.Event_W = Event_W self.Event_b = Event_b self.out_W = out_W self.out_b = out_b
def dense_block(self, args, layers, num_layers, growth_rate, dropout, name_prefix): # concatenated 3x3 convolutions for n in range(num_layers): network = layers[-1] conv = self.affine_relu_conv(args, layers, channels=growth_rate, filter_size=3, dropout=dropout, name_prefix=name_prefix + '_l%02d' % (n + 1)) #TODO: treat initialization as hyperparameter, but don't regularize parameters? conv = BatchNormLayer(conv, name=name_prefix + '_l%02dbn' % (n + 1), beta=None, gamma=None) #TODO: add Gaussian noise? layers.append(conv) #redundant? if args.addActivationNoise: conv = GaussianNoiseLayer( layers[-1], name=name_prefix + '_l%02dGn' % (n + 1), sigma=init.Constant( args.invSigmoidActivationNoiseMagnitude), shared_axes='auto') self.params_noise.append(conv.sigma) layers.append(conv) #self.add_params_to_self(args, conv) #no parameters, beta=gamma=None layers.append( ConcatLayer([network, conv], axis=1, name=name_prefix + '_l%02d_join' % (n + 1))) return layers[-1]