def __init__(self, incoming, p=0.5, rescale=True, noise_layer=None, **kwargs): super(TiedDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale self._master = noise_layer self._mask = None
def __init__(self): """ A simple class for accumulating any cost Used in layers with BayesianMeta """ self.srng = RandomStreams(get_rng().randint(1, 2147462579)) self.total = []
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), pad=0, untie_biases=False, Wconv=GlorotUniform(), b=Constant(0.), nonlinearity=nonlinearities.rectify, flip_filters=False, convolution=T.nnet.conv2d, ard_init=-10, **kwargs): super(Conv2DVarDropOutARD, self).__init__(incoming, num_filters, filter_size, stride, pad, untie_biases, Wconv, b, nonlinearity, flip_filters) self.convolution = convolution self.reg = True self.shape = self.get_W_shape() self.log_sigma2 = self.add_param(Constant(ard_init), self.shape, name="ls2") self._srng = RandomStreams(get_rng().randint(1, 2147462579))
def __init__(self, incoming, input_feature_num, input_atom_num, input_bond_num, hidden_units_num, max_atom_len, p_dropout=0.0,\ W_neighbors=lasagne.init.GlorotUniform(),b_neighbors=lasagne.init.Constant(0.),\ W_atoms=lasagne.init.GlorotUniform(),b_atoms=lasagne.init.Constant(0.),\ nonlinearity=nonlinearities.rectify,batch_normalization=True, **kwargs): super(FingerprintHiddensLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) #initlialize the values of the weight matrices I'm going to use to transform self.W_neighbors = self.add_param( W_neighbors, (input_feature_num + input_bond_num, hidden_units_num), name="W_neighbors") self.b_neighbors = self.add_param(b_neighbors, (hidden_units_num, ), name="b_neighbors", regularizable=False) self.W_atoms = self.add_param(W_atoms, (input_atom_num, hidden_units_num), name="W_atoms") self.b_atoms = self.add_param(b_atoms, (hidden_units_num, ), name="b_atoms", regularizable=False) self.num_units = hidden_units_num self.atom_num = input_atom_num self.input_feature_num = input_feature_num self.p_dropout = p_dropout self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.length = max_atom_len
def __init__(self, t=0.1, eps=1e-20): assert t != 0 self.temperature=t self.eps=eps self._srng = RandomStreams(get_rng().randint(1, 2147462579))
def __init__(self, incoming, p=0.5, rescale=True, shared_axes=(), **kwargs): super(DropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale self.shared_axes = tuple(shared_axes)
def __init__(self, incoming, alternative, survival_prob, **kwargs): super(RandomSwitchLayer, self).__init__([incoming, alternative], **kwargs) self._survival_prob = survival_prob # ensure different layers are not using same seeded # random generators # -> else all layers are always taking same option... self._rng = RandomStreams(get_rng().randint(1, 2147462579))
def __init__(self, incoming, num_in_sum, num_in_max, max_strength=-1, alpha=lasagne.init.Normal(0.05), beta=lasagne.init.Constant(0.), noise_sigma=0.0, shared_axes='auto', **kwargs): super(GHHActivationLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': self.shared_axes = (0, ) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int): self.shared_axes = (shared_axes, ) else: self.shared_axes = shared_axes self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.num_in_sum = num_in_sum self.num_in_max = num_in_max self.max_strength = max_strength self.noise_sigma = noise_sigma # Shape of a single parameter single_shape = [ size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes ] if any(size is None for size in single_shape): raise ValueError("GHHActivationLayer needs input sizes for " "all axes that alpha's are not shared over.") # Shape of entire alpha and beta # shape = single_shape + [self.num_in_sum,self.num_in_max-1] # we use # the original output in max to avoid diminishing grads shape = single_shape + [self.num_in_sum, self.num_in_max] # dimshuffle pattern for input self.input_pattern = ['x', 'x'] + range(len(self.input_shape)) # dimshuffle pattern for alpha and beta axes = iter(range(len(single_shape))) single_pattern = [ 'x' if input_axis in self.shared_axes else next(axes) for input_axis in six.moves.xrange(len(self.input_shape)) ] self.param_pattern = [1, 2] + single_pattern self.alpha = self.add_param( alpha, shape, name="alpha", regularizable=True ) # we want alpha to be regularizable as it will affect output range self.beta = self.add_param(beta, shape, name="beta", regularizable=False)
def __init__(self, incoming, w_freq, alpha, shared_axes=(), **kwargs): super(WordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.w_frew = w_freq self.alpha = alpha # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1.) - (T.constant(alpha) / (lo(w_freq) + T.constant(alpha))) self.shared_axes = tuple(shared_axes)
def __init__(self, incoming, w_freq, alpha, shared_axes=(), **kwargs): super(WordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.w_frew = w_freq self.alpha = alpha # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1.)-(T.constant(alpha) / (lo(w_freq) + T.constant(alpha))) self.shared_axes = tuple(shared_axes)
def __init__(self, incoming, previous_mask, p=0.5, rescale=False, shared_axes=(), **kwargs): super(ConditionedWordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1) - p self.previous_mask = -(lo(previous_mask)-T.constant(1)) self.shared_axes = tuple(shared_axes)
def sample(self, shape): W = self.initializer.sample(shape) if self.density < 1.0: N = np.prod(W.shape) drop_ix = get_rng().choice(N, size=int((1.0 - self.density) * N), replace=False) W.reshape(-1, )[drop_ix] = 0.0 lmax = np.max(np.abs(np.linalg.eig(W)[0])) return self.radius * W / lmax
def __init__(self, mu=0, std=np.exp(-3), seed=None): """ Approximation that samples network weights from factorized normal distribution. :param mu: prior mean for gaussian weights :param std: prior std for gaussian weights :param seed: random seed """ self.prior_mu = mu self.prior_std = std self.srng = RandomStreams(seed or get_rng().randint(1, 2147462579))
def __init__(self, mu=0, std=np.exp(-3),seed=None): """ Approximation that samples network weights from factorized normal distribution. :param mu: prior mean for gaussian weights :param std: prior std for gaussian weights :param seed: random seed """ self.prior_mu = mu self.prior_std = std self.srng = RandomStreams(seed or get_rng().randint(1, 2147462579))
def tied_losses(preds, n_sample_preds, n_classes, n_pairs): preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) _srng = RandomStreams(get_rng().randint(1, 2147462579)) rand_inds = _srng.choice([n_pairs * 2], n_sample_preds, replace=False) part_1 = preds_per_trial_row[:,rand_inds[:n_pairs]] part_2 = preds_per_trial_row[:,rand_inds[n_pairs:]] # Have to now ensure first values are larger zero # for numerical stability :/ eps = 1e-4 part_1 = T.maximum(part_1, eps) loss = categorical_crossentropy(part_1, part_2) return loss
def tied_losses(preds, n_sample_preds, n_classes, n_pairs): preds_per_trial_row = preds.reshape((-1, n_sample_preds, n_classes)) _srng = RandomStreams(get_rng().randint(1, 2147462579)) rand_inds = _srng.choice([n_pairs * 2], n_sample_preds, replace=False) part_1 = preds_per_trial_row[:, rand_inds[:n_pairs]] part_2 = preds_per_trial_row[:, rand_inds[n_pairs:]] # Have to now ensure first values are larger zero # for numerical stability :/ eps = 1e-4 part_1 = T.maximum(part_1, eps) loss = categorical_crossentropy(part_1, part_2) return loss
def rrelu(tensorin, shape, lower=0.3, upper=0.8, train=True): if not train or upper == lower: tensorout = T.nnet.relu(tensorin, (upper + lower) / 2.0) else: shape = list(shape) shared_axes = (0,) + tuple(range(2, len(shape))) for ax in shared_axes: shape[ax] = 1 srng = RandomStreams(get_rng().randint(1, 2147462579)) rnd = srng.uniform(tuple(shape), low=lower, high=upper, dtype=theano.config.floatX) rnd = T.addbroadcast(rnd, *shared_axes) tensorout = T.nnet.relu(tensorin, rnd) return tensorout
def __init__(self, incoming, p=lasagne.init.Constant(-10), log_alpha=None, mask=None, n_samples=None, shared_axes=(), **kwargs): super(GaussianDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.shared_axes = tuple(shared_axes) if log_alpha is None: if isinstance(p, Number): p = np.atleast_1d(p) if callable(p): p_shape = self.input_shape[1:] else: p_shape = p.shape p = lasagne.utils.create_param(p, p_shape, name='p') p = p.get_value() log_alpha = np.log(p / (1 - p)) # add log_alpha as trainable parameter if isinstance(log_alpha, Number): log_alpha = np.atleast_1d(log_alpha) if callable(log_alpha): log_alpha_shape = self.input_shape[1:] elif isinstance(log_alpha, tt.sharedvar.SharedVariable): log_alpha_shape = log_alpha.get_value().shape else: log_alpha_shape = log_alpha.shape self.log_alpha = self.add_param(log_alpha, log_alpha_shape, name='log_alpha', regularizable=False) # init mask to shape compatible with log_alpha mask_shape = [2] + list(self.input_shape[1:]) # the mask should be drawn from a normal (1, alpha) distribution sq_alpha = np.exp(0.5 * self.log_alpha.get_value()) mask = sq_alpha * np.random.normal(1, 1, mask_shape).astype(floatX) self.mask = self.add_param(mask, mask_shape, name='mask', trainable=False, regularzable=False) self.mask_updates = None
def __init__(self, incoming, word_input, space, p=0.5, rescale=True, **kwargs): incomings = [incoming, word_input] super(WordDropoutLayer, self).__init__(incomings, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale self.space = space
def __init__(self, incoming, num_units, Wfc=init.Normal(), nonlinearity=rectify, mnc=False, b=init.Constant(0.), **kwargs): super(DenseLayer, self).__init__(incoming) self.num_units = num_units self.nonlinearity = nonlinearity self.num_inputs = int(np.prod(self.input_shape[1:])) # what is srng? self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W = self.add_param(Wfc, (self.num_inputs, self.num_units), name="W") # max norm constraint if mnc: self.W = updates.norm_constraint(self.W, mnc) self.b = self.add_param(b, (num_units,), name="b", regularizable=False)
def __init__(self, incoming, previous_mask, p=0.5, rescale=False, shared_axes=(), **kwargs): super(ConditionedWordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1) - p self.previous_mask = -(lo(previous_mask) - T.constant(1)) self.shared_axes = tuple(shared_axes)
def __init__(self, incoming, input_size, output_size, W=init.Normal(), dropout=0., **kwargs): super(DropoutEmbeddingLayer, self).__init__(incoming, **kwargs) self.input_size = input_size self.output_size = output_size self.dropout = dropout self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W = self.add_param(W, (input_size, output_size), name="W")
def __init__(self, incoming, num_units, rng, factorized=True, common_noise=False, sigma_0=0.4, use_mu_init=True, **kwargs): super(NoisyDenseLayer, self).__init__(incoming, num_units, **kwargs) if not common_noise and self.num_leading_axes != 1: raise NotImplementedError("Test use of theano.tensor.batched_dot") num_inputs = int(np.prod(self.input_shape[self.num_leading_axes:])) if use_mu_init: # (override earlier W and b values, using num_inputs) val = np.sqrt(1 / num_inputs) if factorized else \ np.sqrt(3 / num_inputs) for param in [self.W, self.b]: param.set_value(floatX(get_rng().uniform( -val, val, param.get_value(borrow=True).shape))) # NOTE: paper quotes sigma_0 = 0.017 in case of not factorized sigma_0 = sigma_0 / np.sqrt(num_inputs) if factorized else sigma_0 W_sigma = b_sigma = Constant(sigma_0) self.W_sigma = self.add_param(W_sigma, (num_inputs, num_units), name="W_sigma") if self.b is None: self.b_sigma = None else: self.b_sigma = self.add_param(b_sigma, (num_units,), name="b_sigma", regularizable=False) if common_noise: if factorized: self.eps_i = eps_i = rng.normal((num_inputs,)) self.eps_j = eps_j = rng.normal((num_units,)) self.W_epsilon = T.outer(f(eps_i), f(eps_j)) self.b_epsilon = f(eps_j) else: self.W_epsilon = rng.normal((num_inputs, num_units)) self.b_epsilon = rng.normal((num_units,)) else: self.num_inputs = num_inputs self.num_units = num_units self.W_epsilon = None # Must build later, when have input length self.b_epsilon = None self.eps_is, self.eps_js = list(), list() self.W_epsilons, self.b_epsilons = list(), list() self.rng = rng self.common_noise = common_noise self.factorized = factorized self.use_mu_init = use_mu_init
def test_specified_rng(self, input_layer): from lasagne.layers.noise import DropoutLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 rng = get_rng() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval2 = result.eval() set_rng(rng) # reset to original RNG for other tests assert numpy.allclose(result_eval1, result_eval2)
def __init__(self, incoming, num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, num_leading_axes=1, p=0.5, shared_axes=(), noise_samples=None, **kwargs): super(DenseDropoutLayer, self).__init__( incoming, num_units, W, b, nonlinearity, num_leading_axes, **kwargs) self.p = p self.shared_axes = shared_axes # init randon number generator self._srng = RandomStreams(get_rng().randint(1, 2147462579)) # initialize noise samples self.noise = self.init_noise(noise_samples)
def __init__(self, incoming, num_in_sum, num_in_max, max_strength=-1, alpha=lasagne.init.Normal(0.05), beta=lasagne.init.Constant(0.), noise_sigma=0.0, shared_axes='auto', **kwargs): super(GHHActivationLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': self.shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int): self.shared_axes = (shared_axes,) else: self.shared_axes = shared_axes self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.num_in_sum = num_in_sum self.num_in_max = num_in_max self.max_strength = max_strength self.noise_sigma = noise_sigma # Shape of a single parameter single_shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes] if any(size is None for size in single_shape): raise ValueError("GHHActivationLayer needs input sizes for " "all axes that alpha's are not shared over.") # Shape of entire alpha and beta # shape = single_shape + [self.num_in_sum,self.num_in_max-1] # we use # the original output in max to avoid diminishing grads shape = single_shape + [self.num_in_sum, self.num_in_max] # dimshuffle pattern for input self.input_pattern = ['x', 'x'] + range(len(self.input_shape)) # dimshuffle pattern for alpha and beta axes = iter(range(len(single_shape))) single_pattern = ['x' if input_axis in self.shared_axes else next( axes) for input_axis in six.moves.xrange(len(self.input_shape))] self.param_pattern = [1, 2] + single_pattern self.alpha = self.add_param(alpha, shape, name="alpha", regularizable=True) # we want alpha to be regularizable as it will affect output range self.beta = self.add_param(beta, shape, name="beta", regularizable=False)
def __init__(self, incoming, sigma=init.Constant(-1), shared_axes='all', **kwargs): super(GaussianNoiseLayer, self).__init__(incoming, **kwargs) if shared_axes == 'auto': #share sigma over all but the second axis shared_axes = (0,) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': #share sigma over all axes (e.g. for input) shared_axes = tuple(range(0, len(self.input_shape))) elif isinstance(shared_axes, int): shared_axes = (shared_axes,) else: shared_axes = () self.shared_axes = shared_axes shape = [size for axis, size in enumerate(self.input_shape) if axis not in self.shared_axes] if any(size is None for size in shape): raise ValueError("GaussianNoiseLayer needs specified input sizes for " "all axes that sigmas are not shared over.") #sigma on log10 scale self.sigma = self.add_param(sigma, shape, 'sigma', regularizable=False) self._srng = RandomStreams(get_rng().randint(1, 2147462579))
def test_specified_rng(): from lasagne.random import get_rng, set_rng from lasagne.init import (Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal) from numpy.random import RandomState from numpy import allclose seed = 123456789 rng = get_rng() for init_class in [Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal]: set_rng(RandomState(seed)) sample1 = init_class().sample((100, 100)) set_rng(RandomState(seed)) sample2 = init_class().sample((100, 100)) set_rng(rng) # reset to original RNG for other tests assert allclose(sample1, sample2),\ ("random initialization was inconsistent for {}" .format(init_class.__name__))
def test_specified_rng(): from lasagne.random import get_rng, set_rng from lasagne.init import (Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal) from numpy.random import RandomState from numpy import allclose seed = 123456789 rng = get_rng() for init_class in [ Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal ]: set_rng(RandomState(seed)) sample1 = init_class().sample((100, 100)) set_rng(RandomState(seed)) sample2 = init_class().sample((100, 100)) set_rng(rng) # reset to original RNG for other tests assert allclose(sample1, sample2),\ ("random initialization was inconsistent for {}" .format(init_class.__name__))
def __init__(self, incoming, num_units, W=lasagne.init.GlorotUniform(gain=network.GlorotUniformGain[lasagne.nonlinearities.sigmoid]), b=lasagne.init.Constant(0.), alpha=1, rescale=True, #beta=0, **kwargs): super(AdaptiveDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.num_units = num_units num_inputs = int(numpy.prod(self.input_shape[1:])) self.alpha = alpha #self.W = self.add_param(W, (num_inputs, num_units), name="W", adaptable=True) self.W = self.add_param(W, (num_inputs, num_units), name="W", trainable=False, adaptable=True) self.b = self.add_param(b, (num_units,), name="b", regularizable=False, trainable=False, adaptable=True); self.rescale = rescale
def __init__(self, incoming, activation_probability, rescale=True, **kwargs): super(GeneralizedDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) ''' self._alpha_alpha = alpha; assert len(self.input_shape)==2; dimensionality = self.input_shape[1]; #dimensionality = np.prod(self.input_shape[1:]); shape_alpha = self._alpha_alpha / numpy.arange(1, dimensionality + 1); shape_beta = 1.0; activation_probability = numpy.zeros(dimensionality); for index in xrange(dimensionality): activation_probability[index] = numpy.random.beta(shape_alpha[index], shape_beta); ''' self.activation_probability = activation_probability; self.rescale = rescale
def __init__(self, architecture, hyperparameter={}): self.archi = architecture self.hyperp = hyperparameter self._srng = RandomStreams(get_rng().randint( 1, 2147462579)) # for adaptive noise self._srng2 = rStream(2147462579) # Create nolearn ModifiedNeuralNet object self.classifier = ModifiedNeuralNet( layers=self.archi, max_epochs=self.hyperp.setdefault('epochs',100), update=self.hyperp.setdefault('optimizer',lasagne.updates.adam), update_learning_rate=self.hyperp.setdefault('learningRate',0.001), objective = modifiedObjective, objective_logitSens = self.hyperp.setdefault('logitSens',0.), objective_probSens = self.hyperp.setdefault('probSens',0.), objective_lossSens = self.hyperp.setdefault('lossSens',0.), objective_std = self.hyperp.setdefault('trainingDataStd',None), objective_loss_function=categorical_crossentropy, verbose=0, batch_iterator_train = DataAugmentationBatchIterator( self.hyperp.setdefault('batchSize',64), disturbLabelRate=self.hyperp.setdefault('disturbLabelRate',0), sdWidth=self.hyperp.setdefault('sdWidth',0), sdNumber=self.hyperp.setdefault('sdNumber',0), shuffle=True), batch_iterator_test = nolearn.lasagne.BatchIterator( self.hyperp.setdefault('batchSize',64),shuffle=False),\ train_split = TrainSplit(eval_size=self.hyperp.setdefault( 'validationSetRatio',.1)), objective_l1 = self.hyperp.setdefault('l1',0.), objective_l2 = self.hyperp.setdefault('l2',0.01), on_training_started=[nolearn.lasagne.PrintLayerInfo()], on_epoch_finished=[getIndividualLosses, printError, addEndTimeToHistory, printAdaptiveNoise, saveBestValidNet]) self.classifier.initialize()
def __init__(self, incoming, num_units, Wfc=init.Normal(), nonlinearity=rectify, mnc=False, g=init.Constant(1.), b=init.Constant(0.), **kwargs): super(WeightNormLayer, self).__init__(incoming) self.num_units = num_units self.nonlinearity = nonlinearity self.num_inputs = int(np.prod(self.input_shape[1:])) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W_norm = self.add_param(Wfc, (self.num_inputs, self.num_units), name="W_norm") self.g = self.add_param(g, (self.num_units, ), name="g") self.b = self.add_param(b, (self.num_units, ), name="b", regularizable=False) W_axes_to_sum = 0 W_dimshuffle_args = ['x', 0] self.W = self.W_norm * ( self.g / T.sqrt(T.sum(T.square(self.W_norm), axis=W_axes_to_sum)) ) # max norm constraint if mnc: self.W = updates.norm_constraint(self.W, mnc)
def __init__(self, incoming, lower=0.3, upper=0.8, shared_axes='auto', **kwargs): super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.lower = lower self.upper = upper if not isinstance(lower > upper, theano.Variable) and lower > upper: raise ValueError("Upper bound for RandomizedRectifierLayer needs " "to be higher than lower bound.") if shared_axes == 'auto': self.shared_axes = (0, ) + tuple(range(2, len(self.input_shape))) elif shared_axes == 'all': self.shared_axes = tuple(range(len(self.input_shape))) elif isinstance(shared_axes, int): self.shared_axes = (shared_axes, ) else: self.shared_axes = shared_axes
def __init__(self, incoming, input_feature_num, input_atom_num, input_bond_num, hidden_units_num, max_atom_len, p_dropout=0.0,\ W_neighbors=lasagne.init.GlorotUniform(),b_neighbors=lasagne.init.Constant(0.),\ W_atoms=lasagne.init.GlorotUniform(),b_atoms=lasagne.init.Constant(0.),\ nonlinearity=nonlinearities.rectify,batch_normalization=True, **kwargs): super(FingerprintHiddensLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) #initlialize the values of the weight matrices I'm going to use to transform self.W_neighbors = self.add_param(W_neighbors,(input_feature_num+input_bond_num,hidden_units_num),name="W_neighbors") self.b_neighbors = self.add_param(b_neighbors,(hidden_units_num,),name="b_neighbors",regularizable=False) self.W_atoms = self.add_param(W_atoms,(input_atom_num,hidden_units_num),name="W_atoms") self.b_atoms = self.add_param(b_atoms,(hidden_units_num,),name="b_atoms",regularizable=False) self.num_units = hidden_units_num self.atom_num = input_atom_num self.input_feature_num = input_feature_num self.p_dropout = p_dropout self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.length = max_atom_len
def __init__(self, incoming, sigma=0.1, noise_layer=None, **kwargs): super(TiedGaussianNoiseLayer, self).__init__(incoming) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.sigma = sigma self._master = noise_layer self._noise = None
def sample(self, shape): return floatX( np.exp(get_rng().uniform(low=self.range[0], high=self.range[1], size=shape)))
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(), cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(), nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.), backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, dropout=0., **kwargs): # This layer inherits from a MergeLayer, because it can have four # inputs - the layer input, the mask, the initial hidden state and the # inital cell state. We will just provide the layer input as incomings, # unless a mask input, inital hidden state or initial cell state was # provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 self.cell_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings) - 1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings) - 1 if isinstance(cell_init, Layer): incomings.append(cell_init) self.cell_init_incoming_index = len(incomings) - 1 # Initialize parent layer super(DropoutLSTMLayer, self).__init__(incomings, **kwargs) # If the provided nonlinearity is None, make it linear if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.learn_init = learn_init self.num_units = num_units self.backwards = backwards self.peepholes = peepholes self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final self._srng = RandomStreams(get_rng().randint(1, 2147462579)) if 0. <= dropout < 1.: self.dropout = dropout else: raise ValueError("dropout must be between 0 and 1.") if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") num_inputs = np.prod(input_shape[2:]) def add_gate_params(gate, gate_name): """ Convenience function for adding layer parameters from a Gate instance. """ return (self.add_param(gate.W_in, (num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(gate.W_hid, (num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(gate.b, (num_units, ), name="b_{}".format(gate_name), regularizable=False), gate.nonlinearity) # Add in parameters from the supplied Gate instances (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate, self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate, self.nonlinearity_forgetgate) = add_gate_params( forgetgate, 'forgetgate') (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell, self.nonlinearity_cell) = add_gate_params(cell, 'cell') (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate, self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate') # If peephole (cell to gate) connections were enabled, initialize # peephole connections. These are elementwise products with the cell # state, so they are represented as vectors. if self.peepholes: self.W_cell_to_ingate = self.add_param(ingate.W_cell, (num_units, ), name="W_cell_to_ingate") self.W_cell_to_forgetgate = self.add_param( forgetgate.W_cell, (num_units, ), name="W_cell_to_forgetgate") self.W_cell_to_outgate = self.add_param(outgate.W_cell, (num_units, ), name="W_cell_to_outgate") # Setup initial values for the cell and the hidden units if isinstance(cell_init, Layer): self.cell_init = cell_init else: self.cell_init = self.add_param(cell_init, (1, num_units), name="cell_init", trainable=learn_init, regularizable=False) if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param(hid_init, (1, self.num_units), name="hid_init", trainable=learn_init, regularizable=False)
def __init__(self, incoming, p=0.5, rescale=True, **kwargs): super(SparseDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale
def sample(self, shape): assert (len(shape) == 2) scale = self.factor*np.sqrt(6./(shape[0]+shape[1])) return floatX(get_rng().uniform(low=-scale, high=scale, size=shape))
def __init__(self, incomings, **kwargs): super(Q_Layer, self).__init__(incomings, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579))
def sample(self, shape): nelem = shape[0] return floatX(get_rng().uniform( low=0, high=nelem, size=shape))
def __init__(self, idims, odims, n_samples=100, heteroscedastic=False, name='BNN', filename=None, network_spec=None, **kwargs): self.D = idims self.E = odims self.name = name self.should_recompile = False self.trained = False sn = (np.ones((self.E,))*1e-3).astype(floatX) sn = np.log(np.exp(sn)-1) self.unconstrained_sn = theano.shared(sn, name='%s_sn' % (self.name)) eps = np.finfo(np.__dict__[floatX]).eps self.sn = tt.nnet.softplus(self.unconstrained_sn) + eps self.network = None if type(network_spec) is list: self.network_spec = network_spec elif type(network_spec) is dict: network_spec['input_dims'] = idims network_spec['output_dims'] = odims build_fn = network_spec.pop('build_fn', dropout_mlp) self.network_spec = build_fn(**network_spec) else: self.network_spec = None self.network_params = None samples = np.array(n_samples).astype('int32') samples_name = "%s>n_samples" % (self.name) self.n_samples = theano.shared(samples, name=samples_name) self.m_rng = RandomStreams(get_rng().randint(1, 2147462579)) self.X = None self.Y = None self.Xm = None self.iXs = None self.Ym = None self.Ys = None self.heteroscedastic = heteroscedastic # filename for saving fname = '%s_%d_%d_%s_%s' % (self.name, self.D, self.E, theano.config.device, theano.config.floatX) self.filename = fname if filename is None else filename BaseRegressor.__init__(self, name=name, filename=self.filename) if filename is not None: self.load() # optimizer options max_evals = kwargs['max_evals'] if 'max_evals' in kwargs else 2000 conv_thr = kwargs['conv_thr'] if 'conv_thr' in kwargs else 1e-12 min_method = kwargs['min_method'] if 'min_method' in kwargs else 'ADAM' self.optimizer = SGDOptimizer(min_method, max_evals, conv_thr, name=self.name+'_opt') # register theano shared variables for saving self.register_types([tt.sharedvar.SharedVariable]) self.register(['sn', 'network_params', 'network_spec'])
def __init__(self, incoming, sigma=1.0, **kwargs): super(GaussianDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.sigma = sigma
def __init__( self, # input data input_data_layer, input_mask_layer, # model size num_units, # initialize cell_init=init.Constant(0.), hid_init=init.Constant(0.), learn_init=False, # options stochastic=False, skip_scale=T.ones(shape=(1, ), dtype=floatX), backwards=False, gradient_steps=-1, grad_clipping=0, only_return_final=False, **kwargs): # input incomings = [input_data_layer, input_mask_layer] # init input input_init = init.Constant(0.) self.input_init_incoming_index = -1 if isinstance(input_init, Layer): incomings.append(input_init) self.input_init_incoming_index = len(incomings) - 1 # init hidden self.hid_init_incoming_index = -1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings) - 1 # init cell self.cell_init_incoming_index = -1 if isinstance(cell_init, Layer): incomings.append(cell_init) self.cell_init_incoming_index = len(incomings) - 1 # init class super(DiffSkipLSTMLayer, self).__init__(incomings, **kwargs) # set options self.stochastic = stochastic self.skip_scale = skip_scale self.learn_init = learn_init self.num_units = num_units self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.only_return_final = only_return_final # set sampler self.uniform = RandomStreams(get_rng().randint(1, 2147462579)).uniform # get input size input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) ################### # gate parameters # ################### def add_gate_params(gate_name): return (self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_in_to_{}".format(gate_name)), self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_hid_to_{}".format(gate_name)), self.add_param(spec=init.Constant(0.0), shape=(num_units, ), name="b_{}".format(gate_name), regularizable=False)) ##### in gate ##### (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate) = add_gate_params('ingate') self.W_cell_to_ingate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_ingate") ##### forget gate ##### (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate) = add_gate_params('forgetgate') self.W_cell_to_forgetgate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_forgetgate") ##### cell ##### (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell) = add_gate_params('cell') ##### out gate ##### (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate) = add_gate_params('outgate') self.W_cell_to_outgate = self.add_param(spec=init.Uniform(0.1), shape=(num_units, ), name="W_cell_to_outgate") ################### # skip parameters # ################### self.W_cell_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_cell_to_skip") self.b_cell_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_cell_to_skip", regularizable=False) self.W_hid_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, num_units), name="W_hid_to_skip") self.b_hid_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_hid_to_skip", regularizable=False) self.W_in_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_in_to_skip") self.b_in_to_skip = self.add_param(spec=init.Constant(1.0), shape=(num_units, ), name="b_in_to_skip", regularizable=False) self.W_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_units, 1), name="W_skip") self.b_skip = self.add_param(spec=init.Constant(0.0), shape=(1, ), name="b_skip", regularizable=False) self.W_diff_to_skip = self.add_param(spec=init.Orthogonal(0.1), shape=(num_inputs, num_units), name="W_diff_to_skip") self.b_diff_to_skip = self.add_param(spec=init.Constant(0.0), shape=(num_units, ), name="b_diff_to_skip", regularizable=False) if isinstance(input_init, Layer): self.input_init = input_init else: self.input_init = self.add_param(spec=input_init, shape=(1, num_inputs), name="input_init", trainable=learn_init, regularizable=False) if isinstance(cell_init, Layer): self.cell_init = cell_init else: self.cell_init = self.add_param(spec=cell_init, shape=(1, num_units), name="cell_init", trainable=learn_init, regularizable=False) if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param(spec=hid_init, shape=(1, num_units), name="hid_init", trainable=learn_init, regularizable=False)
def __init__(self, incoming, p=0.5, **kwargs): super(IfElseDropLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p