def test_binomial_vector(self): random = RandomStreams(utt.fetch_seed()) n = tensor.lvector() prob = tensor.vector() out = random.binomial(n=n, p=prob) assert out.ndim == 1 f = function([n, prob], out) n_val = [1, 2, 3] prob_val = numpy.asarray([.1, .2, .3], dtype=config.floatX) seed_gen = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(n_val, prob_val) numpy_val0 = numpy_rng.binomial(n=n_val, p=prob_val) assert numpy.all(val0 == numpy_val0) # arguments of size (2,) val1 = f(n_val[:-1], prob_val[:-1]) numpy_val1 = numpy_rng.binomial(n=n_val[:-1], p=prob_val[:-1]) assert numpy.all(val1 == numpy_val1) # Specifying the size explicitly g = function([n, prob], random.binomial(n=n, p=prob, size=(3,))) val2 = g(n_val, prob_val) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.binomial(n=n_val, p=prob_val, size=(3,)) assert numpy.all(val2 == numpy_val2) self.assertRaises(ValueError, g, n_val[:-1], prob_val[:-1])
def test_binomial_vector(self): random = RandomStreams(utt.fetch_seed()) n = tensor.lvector() prob = tensor.vector() out = random.binomial(n=n, p=prob) assert out.ndim == 1 f = function([n, prob], out) n_val = [1, 2, 3] prob_val = np.asarray([0.1, 0.2, 0.3], dtype=config.floatX) seed_gen = np.random.RandomState(utt.fetch_seed()) numpy_rng = np.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(n_val, prob_val) numpy_val0 = numpy_rng.binomial(n=n_val, p=prob_val) assert np.all(val0 == numpy_val0) # arguments of size (2,) val1 = f(n_val[:-1], prob_val[:-1]) numpy_val1 = numpy_rng.binomial(n=n_val[:-1], p=prob_val[:-1]) assert np.all(val1 == numpy_val1) # Specifying the size explicitly g = function([n, prob], random.binomial(n=n, p=prob, size=(3, ))) val2 = g(n_val, prob_val) numpy_rng = np.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.binomial(n=n_val, p=prob_val, size=(3, )) assert np.all(val2 == numpy_val2) with pytest.raises(ValueError): g(n_val[:-1], prob_val[:-1])
def randdrop(x, level, noise_shape=None, seed=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. ''' # if level < 0. or level >= 1: # raise Exception('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1337) rng = RandomStreams(seed=seed) retain_prob = 1 - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def theano_sentence_prediction(self, Sentence, Chars, WordLengths): input_lstm_res_f = self.input_lstm_forward_layer.function(Sentence, Chars, WordLengths) input_lstm_res_b = self.input_lstm_backward_layer.function(Sentence, Chars, WordLengths) input_combined = T.concatenate((input_lstm_res_f, input_lstm_res_b), axis=1) #Make pairwise features. This is really just "tensor product with concatenation instead of multiplication". Is there a command for that? full_matrix, _ = theano.scan(fn=self.__pairwise_features, outputs_info=None, sequences=input_combined, non_sequences=[input_combined, Sentence.shape[0]]) if len(self.lstm_layers) > 0 and self.lstm_layers[0].training: srng = RandomStreams(seed=12345) full_matrix = T.switch(srng.binomial(size=(Sentence.shape[0], Sentence.shape[0]+1, self.hidden_dimension*4), p=0.5), full_matrix, 0) else: full_matrix = 0.5 * full_matrix full_matrix = self.transition_layer.function(full_matrix) for layer in self.lstm_layers: if layer.training: print("hah-train") full_matrix = T.switch(srng.binomial(size=(Sentence.shape[0], Sentence.shape[0]+1, self.hidden_dimension*4), p=0.5), full_matrix, 0) else: print("heh-notrain") full_matrix = 0.5 * full_matrix full_matrix = layer.function(full_matrix) final_matrix = self.output_convolution.function(full_matrix) return T.nnet.softmax(final_matrix)
def theano_sentence_prediction(self, Vs): #Make pairwise features. This is really just "tensor product with concatenation instead of multiplication". Is there a command for that? pairwise_vs, _ = theano.scan(fn=self.__pairwise_features, outputs_info=None, sequences=Vs, non_sequences=[Vs, Vs.shape[0]]) if self.input_lstm_layer.training: srng = RandomStreams(seed=12345) full_matrix = self.input_lstm_layer.function(pairwise_vs) for layer in self.lstm_layers: if self.input_lstm_layer.training: print("hah-train") full_matrix = T.switch(srng.binomial(size=(Vs.shape[0], Vs.shape[0]+1, self.hidden_dimension*4), p=0.5), full_matrix, 0) else: print("heh-notrain") full_matrix = 0.5 * full_matrix full_matrix = layer.function(full_matrix) if self.input_lstm_layer.training: print("hah-train") full_matrix = T.switch(srng.binomial(size=(Vs.shape[0], Vs.shape[0]+1, self.hidden_dimension*4), p=0.5), full_matrix, 0) else: print("heh-notrain") full_matrix = 0.5 * full_matrix final_matrix = self.output_convolution.function(full_matrix) return T.nnet.softmax(final_matrix)
class RBMLayer(BaseLayer): def __init__(self, input_shape, Weight_shape, layername='', core_name='sigmoid'): self.rng = np.random.RandomState(2500) self.rng2 = RandomStreams(3000) self.input_shape = input_shape self.Weight_shape = Weight_shape self.Weight = theano.shared(np.asarray(self.rng.uniform( -1, 1, size=self.Weight_shape), dtype=theano.config.floatX), name=layername + '_Weight') self.bias = theano.shared(np.asarray(self.rng.uniform( -0, 0, size=self.Weight_shape[1]), dtype=theano.config.floatX), name=layername + '_bias') self.bias_vis = theano.shared(np.asarray(self.rng.uniform( -0, 0, size=self.Weight_shape[0]), dtype=theano.config.floatX), name=layername + '_bias_vis') self.layername = layername self.core_name = core_name self.params = [self.Weight, self.bias, self.bias_vis] def vis_to_hid(self, input_x): out = self.core( input_x.dot(self.Weight) + self.bias.dimshuffle('x', 0)) out = self.rng2.binomial(n=1, size=out.shape, p=out) return out def hid_to_vis(self, hid): out = self.core( hid.dot(self.Weight.T) + self.bias_vis.dimshuffle('x', 0)) out = self.rng2.binomial(n=1, size=out.shape, p=out) return out def free_energy(self, vis): ''' Function to compute the free energy ''' wx_b = T.dot(vis, self.Weight) + self.bias vbias_term = vis.dot(self.bias_vis) hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) return -hidden_term - vbias_term def hv(self, input_x): hid = self.vis_to_hid(input_x) vis = self.hid_to_vis(hid) return vis
def test_binomial_from_uniform_cpu(self): #Test using numpy rng = np.random.RandomState(42) probs = rng.rand(10) seed = 1337 nb_samples = 1000000 rng = np.random.RandomState(seed) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng.binomial(n=1, p=probs) rng = np.random.RandomState(seed) success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += (rng.rand(len(probs)) < probs).astype('int') success1 = success1 / nb_samples success2 = success2 / nb_samples assert_array_almost_equal(success1, success2) #Test using Theano's default RandomStreams theano_rng = RandomStreams(1337) rng_bin = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng_bin.eval() theano_rng = RandomStreams(1337) rng_bin = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += rng_bin.eval() assert_array_almost_equal(success1 / nb_samples, success2 / nb_samples) #Test using Theano's sandbox MRG RandomStreams theano_rng = MRG_RandomStreams(1337) success1 = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) theano_rng = MRG_RandomStreams(1337) success2 = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs assert_array_equal(success1.eval(), success2.eval())
class DropoutLayer(object): """ Randomly set to 0 values of the input with probability p. """ def __init__(self, p=0.5, name='dropout_layer'): """ p has to be between 0 and 1 (1 excluded). p is the probability of dropping out a unit. :param p: :param name: """ assert 0. <= p < 1. self.p = p self.rng = RandomStreams(seed=123456) self.name = name def link(self, input): """ dropout link : apply mask to the input. :param input: :return: """ if self.p > 0: mask = self.rng.binomial(n=1, p=1 - self.p, size=input.shape, dtype=theano.config.floatX) self.output = input * mask else: self.output = input return self.output
class LayerDropout(Layer): """ Dropout layer that drops the part of the input according to the dropout rate Attrubutes: dropout_rate: Dropout rate """ def __init__(self, dropout_rate): Layer.__init__(self) self.dropout_rate = dropout_rate numpy_rng = np.random.RandomState(123) self.theano_rng = RandomStreams(numpy_rng.randint(2**30)) def forward(self, ls_inputs, batch_size, run_time): ls_outputs = [] # # p=1-dropout_rate because 1's indicate keep and dropout_rate is prob of dropping # mask = self.theano_rng.binomial(n=1, p=1-dropout_rate, size=ls_inputs.shape) # # The cast is important because # # int * float32 = float64 which pulls things off the gpu # ls_outputs = ls_inputs * T.cast(mask, theano.config.floatX) input = ls_inputs[0] mask = self.theano_rng.binomial(n=1, p=self.dropout_rate, size=input.shape) if not run_time: ls_outputs = [T.switch(mask,input,0)] else: ls_outputs = [self.dropout_rate * input] return ls_outputs def __str__(self): return "Dropout layer (Dropout rate: "+ str(self.dropout_rate) +")\n"
def __init__(self, rng, input, input_shape, active, rate=0.5): rstream = RandomStreams(seed=rng.randint(9999)) mask = T.cast(rstream.binomial(n=1, p=rate, size=input_shape), theano.config.floatX) self.output = T.switch(active, mask * input / rate, input) self.output_shape = input_shape
class DA: def __init__(self, input, n_input, n_hidden, W=None, bhid=None, bout=None): self.input = input self.n_input = n_input self.n_output = n_input self.n_hidden = n_hidden if W is None: initial_W = numpy.random.uniform( low=-4 * numpy.sqrt(6. / (n_hidden + n_input)), high=4 * numpy.sqrt(6. / (n_hidden + n_input)), size=(n_input, n_hidden)).astype(theano.config.floatX) W = theano.shared(value=initial_W, name='W') self.W = W if bhid is None: initial_bhid = numpy.zeros(shape=(n_hidden, )).astype( theano.config.floatX) bhid = theano.shared(value=initial_bhid, name='bhid') self.bhid = bhid if bout is None: initial_bout = numpy.zeros(shape=(n_input, )).astype( theano.config.floatX) bout = theano.shared(value=initial_bout, name='bout') self.bout = bout # 自编码器的输入层和输出层是相同的 self.W_pi = self.W.T self.params = [self.W, self.bhid, self.bout] self.hidden = self.get_hidden_value(self.input) self.output = self.get_reconstructed_value(self.hidden) self.theano_rng = RandomStreams(12345) def get_corrupted_input(self, input, corrupted_level): return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corrupted_level, dtype=theano.config.floatX) * input def get_hidden_value(self, x): return T.nnet.sigmoid(T.dot(x, self.W) + self.bhid) def get_reconstructed_value(self, x): out = T.nnet.sigmoid(T.dot(x, self.W_pi) + self.bout) return out def get_cost_update(self, lr, reg, corrupted_level): x = self.get_corrupted_input(self.input, corrupted_level) #x = self.input y = self.get_hidden_value(x) z = self.get_reconstructed_value(y) w, h = self.input.shape cost = -T.sum( self.input * T.log(z) + (1 - self.input) * (T.log(1 - z)), axis=1) cost = T.mean(cost) gparams = T.grad(cost, self.params) updates = [(p, p - lr * gp) for p, gp in zip(self.params, gparams)] return (cost, updates)
class LinearDropoutLayer(LinearLayer): def __init__(self, rng, n_in, n_out, activation, dropout_rate, use_bias, W=None, b=None): LinearLayer.__init__(self, rng=rng, n_in=n_in, n_out=n_out, W=W, b=b, activation=activation, use_bias=use_bias) self.dropout_rate = dropout_rate self.srng = RandomStreams(rng.randint(1e6)) def output(self, input): mask = self.srng.binomial(n=1, p=1 - self.dropout_rate, size=input.shape, dtype=theano.config.floatX) return input * mask
class DropoutConvLayer(ConvLayer): def __init__(self, numpy_rng, input, input_shape, filter_shape, poolsize, activation, W=None, b=None, border_mode='valid', use_fast=False, dropout_factor=0.5): super(DropoutConvLayer, self).__init__(numpy_rng=numpy_rng, input=input, input_shape=input_shape, filter_shape=filter_shape, poolsize=poolsize, activation=activation, W=W, b=b, border_mode=border_mode, use_fast=use_fast) self.theano_rng = RandomStreams(numpy_rng.randint(2**30)) dropout_prob = self.theano_rng.binomial(n=1, p=1 - dropout_factor, size=self.output.shape, dtype=theano.config.floatX) self.dropout_output = dropout_prob * self.output
class SampledMeanSquaredReconstructionError(MeanSquaredReconstructionError): def __init__(self): self.random_stream = RandomStreams(seed=1) def __call__(self, model, X): # X is theano sparse X_dense = theano.sparse.dense_from_sparse(X) noise = self.random_stream.binomial(size=X_dense.shape, n=1, prob=0.5, ndim=None) # a random pattern that indicates to reconstruct all the 1s and some of the 0s in X P = noise + X_dense P = theano.tensor.switch(P > 0, 1, 0) # penalty on activations L1_units = theano.tensor.abs_(model.encode(X)).sum() # penalty on weights params = model.get_params() W = params[2] L1_weights = theano.tensor.abs_(W).sum() cost = (( (model.reconstruct(X, P) - X_dense * P)**2).sum(axis=1).mean() + 0.001 * (L1_weights + L1_units)) return cost
class Dropout(Layer): def __init__(self, p): self.name = self.__class__.__name__ self.train_flag = True self.p = p self.srng = RandomStreams(seed=np.random.randint(920927)) def f(self, inputs, is_train): out = [] if isinstance(self.p, list): if len(self.p) != len(inputs): raise ValueError(self.name + ': The number of inputs should equal to the number of p if p is list.') for i in xrange(len(inputs)): x = inputs[i] p = self.p[i] if isinstance(self.p, list) else self.p if self.p > 0.: retain_p = 1. - p if is_train: x *= self.srng.binomial(x.shape, p=retain_p, dtype=theano.config.floatX) else: x *= retain_p out.append(x) else: out.append(x) return out
def dropout(x, p, training=True, seed=1234): srng = RandomStreams(seed) if training: y = T.switch(srng.binomial(size=x.shape, p=p), x, 0) else: y = p * x return y
def _dropout_from_layer(numpy_rng, input, p): srng = RandomStreams(numpy_rng.randint(2**30)) mask = srng.binomial(size=input.shape, n=1, p=1 - p, dtype=theano.config.floatX) return input * mask
def CTC_train(self): CTC_LOSSs = T.cast(T.mean(self.CTC_LOSS(), axis=0), "float32") train_data_d = [] train_data_m = [] train_data_m_s = [] learning_rate = T.scalar() decay = T.scalar() seed = np.random.randint(10e6) rng = RandomStreams(seed=seed) grad_rate = 0.8 for data in self.train_data: data_d = rng.binomial((1,), p=grad_rate, dtype="float32")[0]*T.grad(CTC_LOSSs, data) train_data_d.append(data_d) data_m_s = theano.shared(np.zeros(data.get_value().shape).astype(np.float32)) train_data_m_s.append(data_m_s) data_m = data_m_s*decay + (1-decay)*data_d**2 train_data_m.append(data_m) #self.grad_test = theano.function([self.X, self.Y], train_data_d[-4]) #self.data_d_print = theano.function([self.X,self.Y],train_data_d[0][0]) #upd = [(d,d-learning_rate*d_d)for d,d_d in zip(self.train_data,train_data_d)] upd = [(d, d-learning_rate*d_d/T.sqrt(d_m+1e-4))for d,d_d,d_m in zip(self.train_data,train_data_d,train_data_m)] upd1 = [(d_m_s, decay*d_m_s+(1-decay)*d_d**2) for d_m_s,d_d in zip(train_data_m_s,train_data_d)] upd +=upd1 #self.test = theano.function([self.X,self.Y],train_data_d[0]) self.sgd_train = theano.function([self.X, self.Y, learning_rate, decay], [], updates = upd )
class DropoutHiddenLayer(HiddenLayer): def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh, adv_activation_method=None, pool_size=1, pnorm_order=1, dropout_factor=0.5): super(DropoutHiddenLayer, self).__init__(rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b, activation=activation, adv_activation_method=adv_activation_method, pool_size=pool_size, pnorm_order=pnorm_order) self.theano_rng = RandomStreams(rng.randint(2**30)) dropout_prob = self.theano_rng.binomial(n=1, p=1 - dropout_factor, size=self.output.shape, dtype=theano.config.floatX) self.dropout_output = dropout_prob * self.output
def _dropout_from_layer(self, layer): stream = RandomStreams(self.numpy_range.randint(999999)) mask = stream.binomial(size=layer.shape, n=1, p=(1-self._p), dtype=theano.config.floatX) return layer * Tensor.cast(mask, theano.config.floatX)
class HiddenLayer(object): """ Hidden layer class """ def __init__(self, rng, input, n_in, n_out, W=None, b=None, dropout_p=0.5): self.srng = RandomStreams(seed=234) self.dropout_p = dropout_p self.input = input # Initialize weights name = "W_hidden" if W is None: W_values = np.asarray(rng.uniform( low=-np.sqrt(2. / (n_in + n_out)), high=np.sqrt(2. / (n_in + n_out)), size=(n_in, n_out)), dtype=theano.config.floatX) W = theano.shared(value=W_values, name=name, borrow=True) # Initialize bias name = "b_hidden" if b is None: b_values = np.zeros((n_out, ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name=name, borrow=True) self.W = W self.b = b self.output = self.ReLU(T.dot(input, self.W) + self.b) # parameters of the model self.params = [self.W, self.b] def ReLU(self, X): ''' Rectified linear unit ''' return T.maximum(X, 0) def dropout(self, X): ''' Dropout with probability p ''' if self.dropout_p > 0: retain_prob = 1 - self.dropout_p X *= self.srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) X /= retain_prob return X def TestVersion(self, rng, input, n_in, n_out): return HiddenLayer(rng, input, n_in, n_out, W=self.W, b=self.b, dropout_p=0.0)
def __init__(self, input, filter_shape, corruption_level = 0.1, shared_W = None, shared_b = None, image_shape = None, poolsize = (2,2)): theano_rng = RandomStreams() fan_in = numpy.prod(filter_shape[1:]) fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) center = theano.shared(value = 1, name="center") scale = theano.shared(value = 2, name="scale") if shared_W != None and shared_b != None : self.W = shared_W self.b = shared_b else: initial_W = numpy.asarray( numpy.random.uniform( low = -numpy.sqrt(6./(fan_in+fan_out)), high = numpy.sqrt(6./(fan_in+fan_out)), size = filter_shape), dtype = theano.config.floatX) initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.W = theano.shared(value = initial_W, name = "W") self.b = theano.shared(value = initial_b, name = "b") initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") self.x = input self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2], filter_shape[3] ] initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ size = da_filter_shape), dtype = theano.config.floatX) self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") conv2_out = conv.conv2d(self.y, self.W_prime, filter_shape = da_filter_shape, border_mode='full') self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale scaled_x = (self.x + center) / scale self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 ) self.cost = T.mean(self.L) self.params = [ self.W, self.b, self.b_prime ]
def __init__(self,X,mask,shape,is_train=1,p=0.5,state_pre=None): prefix="GRU" self.in_size,self.hidden_size=shape self.W_xr=theano.shared(value=np.asarray((np.random.randn(self.in_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_xr') self.W_hr=theano.shared(value=np.asarray((np.random.randn(self.hidden_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_hr') self.b_r=theano.shared(value=np.asarray(np.zeros(self.hidden_size),dtype=theano.config.floatX), name=prefix+'_b_r') self.W_xz=theano.shared(value=np.asarray((np.random.randn(self.in_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_xz') self.W_hz=theano.shared(value=np.asarray((np.random.randn(self.hidden_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_hz') self.b_z=theano.shared(value=np.asarray(np.zeros(self.hidden_size),dtype=theano.config.floatX), name=prefix+'_b_z') self.W_xh=theano.shared(value=np.asarray((np.random.randn(self.in_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_xh') self.W_hh=theano.shared(value=np.asarray((np.random.randn(self.hidden_size,self.hidden_size) * 0.1),dtype=theano.config.floatX), name=prefix+'_W_hh') self.b_h=theano.shared(value=np.asarray(np.zeros(self.hidden_size),dtype=theano.config.floatX), name=prefix+'_b_h') self.X=X self.mask=mask batch_size=self.X.shape[1] if state_pre==None: state_pre=T.zeros((batch_size,self.hidden_size),dtype=theano.config.floatX) def _step(x,m,h_tm1): r=T.nnet.sigmoid(T.dot(x,self.W_xr) + T.dot(h_tm1,self.W_hr) +self.b_r) z=T.nnet.sigmoid(T.dot(x,self.W_xz) + T.dot(h_tm1,self.W_hz) +self.b_z) gh=T.tanh(T.dot(x , self.W_xh) + T.dot(r * h_tm1 , self.W_hh) + self.b_h) h_t=z * h_tm1 + (T.ones_like(z) - z) * gh h_t = h_t * m[:,None] return h_t h,_=theano.scan(fn=_step, sequences=[self.X,self.mask], outputs_info=state_pre) self.h=h if p>0: trng=RandomStreams(12345) drop_mask=trng.binomial(n=1,p=1-p,size=h.shape,dtype=theano.config.floatX) self.activation=T.switch(T.eq(is_train,1),h*drop_mask,h*(1-p)) else: self.activation=T.switch(T.eq(is_train,1),h,h) self.params=[self.W_xr,self.W_hr,self.b_r, self.W_xz,self.W_hz,self.b_z, self.W_xh,self.W_hh,self.b_h]
class OldGibbsRegressor(object): def __init__(self, n_dim_in, n_dim_out, sample_y=False, n_alpha=1, seed=None): self._w = theano.shared(np.zeros((n_dim_in, n_dim_out), dtype='int'), name='w') self._rng = RandomStreams(seed) if n_alpha == 'all': n_alpha = n_dim_in self._n_alpha = n_alpha self._alpha = theano.shared(np.arange(n_alpha)) # scalar self._sample_y = sample_y @staticmethod def compute_p_wa(w, x, y, alpha): """ Compute the probability the weights at index alpha taking on value 1. """ w_0 = tt.set_subtensor(w[alpha], 0) # (n_dim_in, n_dim_out) w_1 = tt.set_subtensor(w[alpha], 1) # (n_dim_in, n_dim_out) z_0 = tt.nnet.sigmoid(x.dot(w_0)) # (n_samples, n_dim_out) z_1 = tt.nnet.sigmoid(x.dot(w_1)) # (n_samples, n_dim_out) log_likelihood_ratio = tt.sum(tt.log(bernoulli(y, z_1)) - tt.log(bernoulli(y, z_0)), axis=0) # (n_dim_out, ) p_wa = tt.nnet.sigmoid(log_likelihood_ratio) # (n_dim_out, ) return p_wa @symbolic_updater def train(self, x, y): p_wa = self.compute_p_wa(self._w, x, y, self._alpha) w_sample = self._rng.binomial(p=p_wa) # (n_dim_out, ) w_new = tt.set_subtensor(self._w[self._alpha], w_sample) # (n_dim_in, n_dim_out) return [(self._w, w_new), (self._alpha, (self._alpha + self._n_alpha) % self._w.shape[0]) ] @symbolic_stateless def predict(self, x): p_y = tt.nnet.sigmoid(x.dot(self._w)) return self._rng.binomial(p=p_y) if self._sample_y else p_y
def drop(x, rng=rng, p=p): """p is the probability of NOT dropping out a unit""" srng = RandomStreams(rng.randint(999999)) mask = srng.binomial(n=1, p=p, size=x.shape, dtype=theano.config.floatX) return x * mask
def dropout(random_state, X, keep_prob=0.5): if keep_prob > 0. and keep_prob < 1.: seed = random_state.randint(2 ** 30) srng = RandomStreams(seed) mask = srng.binomial(n=1, p=keep_prob, size=X.shape, dtype=theano.config.floatX) return X * mask return X
def __init__(self, _input, noiseRate, seed): super(DropoutLayer, self).__init__(_input) theano_rng = RandomStreams(seed) inp = self.getInput() self.__output = theano_rng.binomial( size=inp.shape, n=1, p=1 - noiseRate, dtype=theano.config.floatX) * inp
def dropout(rng, x, p=0.5): """ Zero-out random values in x with probability p using rng """ if p > 0. and p < 1.: seed = rng.randint(2 ** 30) srng = RandomStreams(seed) mask = srng.binomial(n=1, p=1.-p, size=x.shape, dtype=theano.config.floatX) return x * mask return x
def drop(filters, rng=rng, p=(1 - 4 * float(gap) / filter_shape[-1])): """p is the probability of NOT dropping out a unit""" srng = RandomStreams(rng.randint(999999)) mask = srng.binomial(n=1, p=p, size=x.shape, dtype=theano.config.floatX) return (1. / p) * x * mask
def __init__(self, rng, x, n_in, n_h, p, training): """ This is to initialise a standard RNN hidden unit :param rng: random state, fixed value for randome state for reproducible objective results :param x: input data to current layer :param n_in: dimension of input data :param n_h: number of hidden units/blocks :param p: the probability of dropout :param training: a binary value to indicate training or testing (for dropout training) """ self.input = x if p > 0.0: if training == 1: srng = RandomStreams(seed=123456) self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) else: self.input = (1 - p) * x #(1-p) * self.n_in = int(n_in) self.n_h = int(n_h) # random initialisation Wx_value = np.asarray(rng.normal(0.0, 1.0 / np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, 1.0 / np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) # Input gate weights self.W_xi = theano.shared(value=Wx_value, name='W_xi') self.W_hi = theano.shared(value=Wh_value, name='W_hi') # bias self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i') # initial value of hidden and cell state self.h0 = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='h0') self.c0 = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='c0') self.Wix = T.dot(self.input, self.W_xi) [self.h, self.c], _ = theano.scan(self.recurrent_as_activation_function, sequences=[self.Wix], outputs_info=[self.h0, self.c0]) self.output = self.h self.params = [self.W_xi, self.W_hi, self.b_i] self.L2_cost = (self.W_xi**2).sum() + (self.W_hi**2).sum()
def __init__(self, rng, x, n_in, n_h, p, training, rnn_batch_training=False): """ This is to initialise a standard RNN hidden unit :param rng: random state, fixed value for randome state for reproducible objective results :param x: input data to current layer :param n_in: dimension of input data :param n_h: number of hidden units/blocks :param p: the probability of dropout :param training: a binary value to indicate training or testing (for dropout training) """ self.input = x if p > 0.0: if training==1: srng = RandomStreams(seed=123456) self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0) else: self.input = (1-p) * x #(1-p) * self.n_in = int(n_in) self.n_h = int(n_h) self.rnn_batch_training = rnn_batch_training # random initialisation Wx_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) # Input gate weights self.W_xi = theano.shared(value=Wx_value, name='W_xi') self.W_hi = theano.shared(value=Wh_value, name='W_hi') # bias self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i') # initial value of hidden and cell state if self.rnn_batch_training: self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0') self.h0 = T.repeat(self.h0, x.shape[1], 0) self.c0 = T.repeat(self.c0, x.shape[1], 0) else: self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0') self.Wix = T.dot(self.input, self.W_xi) [self.h, self.c], _ = theano.scan(self.recurrent_as_activation_function, sequences = [self.Wix], outputs_info = [self.h0, self.c0]) self.output = self.h self.params = [self.W_xi, self.W_hi, self.b_i] self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum()
def test_binomial_from_uniform_cpu(self): #Test using numpy rng = np.random.RandomState(42) probs = rng.rand(10) seed = 1337 nb_samples = 1000000 rng = np.random.RandomState(seed) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng.binomial(n=1, p=probs) rng = np.random.RandomState(seed) success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += (rng.rand(len(probs)) < probs).astype('int') success1 = success1 / nb_samples success2 = success2 / nb_samples assert_array_almost_equal(success1, success2) #Test using Theano's default RandomStreams theano_rng = RandomStreams(1337) rng_bin = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) success1 = np.zeros(len(probs)) for i in range(nb_samples): success1 += rng_bin.eval() theano_rng = RandomStreams(1337) rng_bin = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs success2 = np.zeros(len(probs)) for i in range(nb_samples): success2 += rng_bin.eval() assert_array_almost_equal(success1/nb_samples, success2/nb_samples) #Test using Theano's sandbox MRG RandomStreams theano_rng = MRG_RandomStreams(1337) success1 = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX) theano_rng = MRG_RandomStreams(1337) success2 = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs assert_array_equal(success1.eval(), success2.eval())
def dropout(x, p, training=True, seed=1234): p = 1. - p srng = RandomStreams(seed) if training: x *= srng.binomial(size=x.shape, p=p, dtype=x.dtype) x /= p return x else: return x
class DropoutHiddenLayer(HiddenLayer): def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh, adv_activation_method = None, pool_size = 1,pnorm_order=1,dropout_factor=0.5): super(DropoutHiddenLayer, self).__init__(rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b, activation=activation, adv_activation_method = adv_activation_method, pool_size = pool_size,pnorm_order=pnorm_order) self.theano_rng = RandomStreams(rng.randint(2 ** 30)) dropout_prob = self.theano_rng.binomial(n=1, p=1-dropout_factor, size=self.output.shape, dtype=theano.config.floatX) self.dropout_output = dropout_prob * self.output
def dropout(dropout_rate: float, rng: RandomStreams, parameter, use_dropout: bool): if use_dropout: mask = rng.binomial(parameter.shape, p=1. - dropout_rate, dtype=parameter.dtype) return parameter * mask / (1. - dropout_rate) else: return parameter
def theano_sentence_prediction(self, Sentence, Chars, WordLengths): input_lstm_res_f = self.input_lstm_forward_layer.function( Sentence, Chars, WordLengths) input_lstm_res_b = self.input_lstm_backward_layer.function( Sentence, Chars, WordLengths) input_combined = T.concatenate((input_lstm_res_f, input_lstm_res_b), axis=1) #Make pairwise features. This is really just "tensor product with concatenation instead of multiplication". Is there a command for that? full_matrix, _ = theano.scan( fn=self.__pairwise_features, outputs_info=None, sequences=input_combined, non_sequences=[input_combined, Sentence.shape[0]]) if len(self.lstm_layers) > 0 and self.lstm_layers[0].training: srng = RandomStreams(seed=12345) full_matrix = T.switch( srng.binomial(size=(Sentence.shape[0], Sentence.shape[0] + 1, self.hidden_dimension * 4), p=0.5), full_matrix, 0) else: full_matrix = 0.5 * full_matrix full_matrix = self.transition_layer.function(full_matrix) for layer in self.lstm_layers: if layer.training: print("hah-train") full_matrix = T.switch( srng.binomial(size=(Sentence.shape[0], Sentence.shape[0] + 1, self.hidden_dimension * 4), p=0.5), full_matrix, 0) else: print("heh-notrain") full_matrix = 0.5 * full_matrix full_matrix = layer.function(full_matrix) final_matrix = self.output_convolution.function(full_matrix) return T.nnet.softmax(final_matrix)
class DenoisingAutoEncoder(object): def __init__(self, n_visible, n_hidden, weights=None, hidden_bias=None, visible_bias=None, random_on_gpu=False, seed=69, activation=T.nnet.sigmoid): self.n_visible = n_visible self.n_hidden = n_hidden if random_on_gpu: self.t_rng = GPU_RandomStreams(seed) else: self.t_rng = RandomStreams(seed) if not weights: weights = np.asarray( np.random.normal( scale=0.01, size=(self.n_visible, self.n_hidden)), dtype=theano.config.floatX) self.ts_weights = theano.shared(value=weights, name='W', borrow=True) if not hidden_bias: hidden_bias = np.zeros(n_hidden, dtype=theano.config.floatX) self.ts_hidden_bias = theano.shared(value=hidden_bias, name='hb', borrow=True) if not visible_bias: visible_bias = np.zeros(n_visible, dtype=theano.config.floatX) self.ts_visible_bias = theano.shared(value=visible_bias, name='vb', borrow=True) self.x = T.matrix(name='x') self.activation = activation self.params = [self.ts_weights, self.ts_hidden_bias, self.ts_visible_bias] def get_corrupted_input(self, x, corruption_level): return self.t_rng.binomial(size=x.shape, n=1, p=1 - corruption_level) * x def hidden_values(self, x): return self.activation(T.dot(x, self.ts_weights) + self.ts_hidden_bias) def reconstruction(self, hidden): return self.activation(T.dot(hidden, self.ts_weights.T) + self.ts_visible_bias) def get_cost_updates(self, corruption_level, learning_rate): corrupted_input = self.get_corrupted_input(self.x, corruption_level) hidden = self.hidden_values(corrupted_input) reconstruction = self.reconstruction(hidden) loss = -T.sum(self.x * T.log(reconstruction) + (1 - self.x) * T.log(1 - reconstruction), axis=1) cost = T.mean(loss) gparams = T.grad(cost, self.params) updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - learning_rate * gparam)) return cost, updates
def maxout(Z, stop_dropout, archi, dropout_rate, seed=5432): th.config.floatX = 'float32' Z_out = T.maximum(Z[:, :int(archi / 2)], Z[:, int(archi / 2):]) prob = (1 - dropout_rate) srng = RandomStreams(seed=seed) return ifelse( T.lt(stop_dropout, 1.05), Z_out * srng.binomial(size=T.shape(Z_out), p=prob).astype('float32'), Z_out)
def maxout(Z, stop_dropout, archi, dropout_rate, seed=5432): th.config.floatX = 'float32' Z_out = T.maximum(Z[:, :int(archi / 2)], Z[:, int(archi / 2):]) prob = (1 - dropout_rate) srng = RandomStreams(seed=seed) return ifelse(T.lt(stop_dropout, 1.05), Z_out * srng.binomial(size=T.shape(Z_out), p=prob).astype('float32'), Z_out)
class DropoutConvLayer(ConvLayer): def __init__(self, numpy_rng, input, input_shape, filter_shape, poolsize, activation, W=None, b=None, border_mode = 'valid', use_fast = False,dropout_factor=0.5): super(DropoutConvLayer, self).__init__(numpy_rng=numpy_rng, input=input, input_shape=input_shape, filter_shape=filter_shape,poolsize=poolsize,activation=activation, W=W, b=b, border_mode=border_mode,use_fast=use_fast) self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) dropout_prob = self.theano_rng.binomial(n=1, p=1-dropout_factor, size=self.output.shape, dtype=theano.config.floatX) self.dropout_output = dropout_prob * self.output
class Dropout: def __init__(self, dim_out): self.dim_out = dim_out self.rng = RandomStreams() self.inputs = T.matrix() self.cmp = theano.function([self.inputs], self.apply(self.inputs)) def apply(self, inputs): mask = self.rng.binomial(n=1, p=0.5, size=(inputs.shape[0], self.dim_out)) return 2*inputs*T.cast(mask, theano.config.floatX) #multiply by 2 to keep the expected value of the norm the same
class DropoutLayer(object): def __init__(self,prob=0.5): rng=numpy.random.RandomState(23455) self.theano_rng=RandomStreams(rng.randint(2**30)) self.params=None self.prob=prob def get_output(self,mode='train'): if mode == 'train': self.output=self.theano_rng.binomial(size=self.input.shape,n=1,p=1-self.prob,dtype=theano.config.floatX)*self.input else:self.output=self.input*(1-self.prob) return self.output
class SampledMeanSquaredReconstructionError(MeanSquaredReconstructionError): """ mse cost that goes with sparse autoencoder with L1 regularization on activations For theory: Y. Dauphin, X. Glorot, Y. Bengio. ICML2011 Large-Scale Learning of Embeddings with Reconstruction Sampling Parameters ---------- L1 : WRITEME ratio : WRITEME """ def __init__(self, L1, ratio): self.random_stream = RandomStreams(seed=1) self.L1 = L1 self.ratio = ratio def expr(self, model, data, ** kwargs): """ .. todo:: WRITEME """ self.get_data_specs(model)[0].validate(data) X = data # X is theano sparse X_dense = theano.sparse.dense_from_sparse(X) noise = self.random_stream.binomial(size=X_dense.shape, n=1, prob=self.ratio, ndim=None) # a random pattern that indicates to reconstruct all the 1s and some of # the 0s in X P = noise + X_dense P = theano.tensor.switch(P > 0, 1, 0) P = tensor.cast(P, theano.config.floatX) # L1 penalty on activations L1_units = theano.tensor.abs_(model.encode(X)).sum(axis=1).mean() # penalty on weights, optional # params = model.get_params() # W = params[2] # L1_weights = theano.tensor.abs_(W).sum() cost = ((model.reconstruct(X, P) - X_dense) ** 2) cost = (cost * P).sum(axis=1).mean() cost = cost + self.L1 * L1_units return cost
class DropoutLayer(Layer): def __init__(self, rng, dropout_rate): super(DropoutLayer, self).__init__() self.dropout_rate = dropout_rate self.srng = RandomStreams(rng.randint(1e6)) def output(self, input): mask = self.srng.binomial(n=1, p=1-self.dropout_rate, size=input.shape, dtype=theano.config.floatX) return input * mask
def __init__(self, input, rng, p=0.5): """ p is the probablity of dropping a unit """ srng = RandomStreams(rng.randint(999999)) # p=1-p because 1's indicate keep and p is prob of dropping mask = srng.binomial(n=1, p=1-p, size=input.shape, dtype=floatX) self.output = ifelse( T.lt(p,1e-5), input, input*mask / (1-p) )
class AutoEncodeLayer(object): def __init__(self,n_in,n_out,hid_activation='none',vis_activation='none',cost='squre',weight_init='Guassian',gauss_std=None,level=0.2): rng=numpy.random.RandomState(23455) if weight_init == 'Gaussian': assert gauss_std!=None,"Gaussian Distribution must have a std(Standard Deviation)" self.W=theano.shared(Gauss(gauss_std,(n_in,n_out),rng),borrow=True) self.WT=self.W.T print "AutoEncoder layer for FC created input:%d output:%d activation:%s Gauss_Std:%f\n"%(n_in,n_out,hid_activation,gauss_std), elif weight_init == 'Xavier': bound=numpy.sqrt(6. / (n_in + n_out)) if hid_activation == 'logistic':bound=bound*4 value = numpy.asarray(rng.uniform(low=-bound,high=bound,size=(n_in, n_out)),dtype=theano.config.floatX) self.W=theano.shared(value,borrow=True) self.WT=self.W.T print "AutoEncoder layer for FC created input:%d output:%d activation:%s\n"%(n_in,n_out,weight_init), self.bhid=theano.shared(numpy.zeros((n_out,),dtype=theano.config.floatX),borrow=True) self.bvis=theano.shared(numpy.zeros((n_in,),dtype=theano.config.floatX),borrow=True) self.params=[self.W,self.bhid] self.pre_params=[self.W,self.bhid,self.bvis] self.input=T.matrix('x') self.hid_activation=hid_activation self.vis_activation=vis_activation self.level=level self.theano_rng=RandomStreams(rng.randint(2**30)) self.cost=cost def get_output(self): output=T.dot(self.input,self.W)+self.bhid if self.hid_activation == 'none':return output if self.hid_activation == 'tanh':return T.tanh(output) if self.hid_activation == 'logistic':return T.nnet.sigmoid(output) if self.hid_activation == 'relu':return T.maximum(0,output) def corrupte(self,level): if(level>0.0):return self.theano_rng.binomial(size=self.input.shape,n=1,p=1-self.level,dtype=theano.config.floatX)*self.input else:return self.input def reconstruct(self): corrupted_x=self.corrupte(self.level) hidden=T.dot(corrupted_x,self.W)+self.bhid #### input --> hidden #### if self.hid_activation=='relu':y=T.maximum(0,hidden) elif self.hid_activation=='logistic':y=T.nnet.sigmoid(hidden) elif self.hid_activation=='tanh':y=T.tanh(hidden) elif self.hid_activation=='softplus':y=T.nnet.softplus(hidden) #### hideen --> visble #### visble=T.dot(y,self.WT)+self.bvis if self.vis_activation=='softplus':z=T.nnet.softplus(visble) elif self.vis_activation=='logistic':z=T.nnet.sigmoid(visble) elif self.vis_activation=='tanh':z=T.tanh(visble) elif self.vis_activation=='relu':z=T.maximum(0,visble) #### reconstruction #### if self.cost=='squre':L=T.sum((self.input-z)**2,axis=1) #quadratic cost else:L=-T.sum(self.input * T.log(z) + (1 - self.input) * T.log(1 - z), axis=1) #cross entropy cost=T.mean(L) return cost
class SampledMeanBinaryCrossEntropy(Cost): """ CE cost that goes with sparse autoencoder with L1 regularization on activations For theory: Y. Dauphin, X. Glorot, Y. Bengio. ICML2011 Large-Scale Learning of Embeddings with Reconstruction Sampling """ def __init__(self, L1, ratio): self.random_stream = RandomStreams(seed=1) self.L1 = L1 self.one_ratio = ratio def expr(self, model, data, **kwargs): self.get_data_specs(model)[0].validate(data) X = data # X is theano sparse X_dense = theano.sparse.dense_from_sparse(X) noise = self.random_stream.binomial(size=X_dense.shape, n=1, prob=self.one_ratio, ndim=None) # a random pattern that indicates to reconstruct all the 1s and some of the 0s in X P = noise + X_dense P = theano.tensor.switch(P > 0, 1, 0) P = tensor.cast(P, theano.config.floatX) # L1 penalty on activations reg_units = theano.tensor.abs_(model.encode(X)).sum(axis=1).mean() # penalty on weights, optional # params = model.get_params() # W = params[2] # there is a numerical problem when using # tensor.log(1 - model.reconstruct(X, P)) # Pascal fixed it. before_activation = model.reconstruct_without_dec_acti(X, P) cost = ( 1 * X_dense * tensor.log(tensor.log(1 + tensor.exp(-1 * before_activation))) + (1 - X_dense) * tensor.log(1 + tensor.log(1 + tensor.exp(before_activation)))) cost = (cost * P).sum(axis=1).mean() cost = cost + self.L1 * reg_units return cost def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source())
def __init__(self, rng=None,theano_rng=None, input=None, n_in=None , n_out=None, W=None, b=None, activation=None, decoder=False, first_layer_corrup=False): self.input = input if not rng: rng = numpy.random.RandomState(123) if not theano_rng: theano_rng = RandomStreams(rng.randint(2 ** 30)) if W is None: W_values = numpy.asarray( rng.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == T.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name='W', borrow=True) if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b', borrow=True) self.W = W self.b = b if first_layer_corrup: corruption_level = 0.1 input = theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * input if decoder: lin_output=T.dot(input, self.W.T) + self.b else: lin_output = T.dot(input, self.W) + self.b self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [self.W, self.b]
class Dropout(Layer): def __init__(self,rate): self.p = numpy.array(1-rate).astype(theano.config.floatX) self.rng = RandomStreams(numpy.random.randint(1234)) def forward(self,x): print "Layer/Dropout" mask = T.cast( ( 1 / self.p ) * self.rng.binomial(n=1,p=self.p,size=x.shape), dtype=theano.config.floatX) return mask*x
class FDAE(object): """ Formula for Denoising Auto Encoder """ def __init__(self, n_visible, n_hidden, corruption_level, X = None, W = None, bvis = None, bhid = None): rng = np.random.RandomState(0) self.theano_rng = RandomStreams(rng.randint(2 ** 30)) self.corruption_level = corruption_level ## model inputs self.X = X or T.matrix(name = 'X') ## model params if not W: W_bound = 4. * np.sqrt(6. / (n_hidden + n_visible)) W = theano.shared(value = np.asarray(rng.uniform( low = -W_bound, high = W_bound, size = (n_visible, n_hidden)), dtype = theano.config.floatX), name = 'DAE_W', borrow = True) if not bvis: bvis = theano.shared(value = np.zeros(n_visible, dtype = theano.config.floatX), name = 'DAE_bvis', borrow = True) if not bhid: bhid = theano.shared(value = np.zeros(n_hidden, dtype = theano.config.floatX), name = 'DAE_bhid', borrow = True) self.W = W self.W_prime = self.W.T self.b = bhid self.b_prime = bvis self.params = (self.W, self.b, self.b_prime) ## model prediction - no corruption version self.prediction = self.hidden_value(self.X) ## model cost and error tilde_X = self.corrupted_input(self.X) y = self.hidden_value(tilde_X) z = self.reconstructed_input(y) L = -T.sum(self.X * T.log(z) + (1-self.X)*T.log(1-z), axis = 1) self.cost = T.mean(L) ## self.error = Not relevant def corrupted_input(self, X): return self.theano_rng.binomial(size = X.shape, n = 1, p = 1 - self.corruption_level, dtype = theano.config.floatX) * X def hidden_value(self, X): return T.nnet.sigmoid(T.dot(X, self.W) + self.b) def reconstructed_input(self, hidden): return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
class SampledMeanBinaryCrossEntropy(Cost): """ CE cost that goes with sparse autoencoder with L1 regularization on activations For theory: Y. Dauphin, X. Glorot, Y. Bengio. ICML2011 Large-Scale Learning of Embeddings with Reconstruction Sampling """ def __init__(self, L1, ratio): self.random_stream = RandomStreams(seed=1) self.L1 = L1 self.one_ratio = ratio def expr(self, model, data, ** kwargs): self.get_data_specs(model)[0].validate(data) X = data # X is theano sparse X_dense = theano.sparse.dense_from_sparse(X) noise = self.random_stream.binomial(size=X_dense.shape, n=1, prob=self.one_ratio, ndim=None) # a random pattern that indicates to reconstruct all the 1s and some of the 0s in X P = noise + X_dense P = theano.tensor.switch(P>0, 1, 0) P = tensor.cast(P, theano.config.floatX) # L1 penalty on activations reg_units = theano.tensor.abs_(model.encode(X)).sum(axis=1).mean() # penalty on weights, optional # params = model.get_params() # W = params[2] # there is a numerical problem when using # tensor.log(1 - model.reconstruct(X, P)) # Pascal fixed it. before_activation = model.reconstruct_without_dec_acti(X, P) cost = ( 1 * X_dense * tensor.log(tensor.log(1 + tensor.exp(-1 * before_activation))) + (1 - X_dense) * tensor.log(1 + tensor.log(1 + tensor.exp(before_activation))) ) cost = (cost * P).sum(axis=1).mean() cost = cost + self.L1 * reg_units return cost def get_data_specs(self, model): return (model.get_input_space(), model.get_input_source())
def __theano__noise(self, inp, noisetype, p=None, n=None, sigma=None, thicken=True, mode=None, srng=None): # Local imports from theano.tensor.shared_randomstreams import RandomStreams # Parse noise type and check arguments if noisetype in ['binomial', 'dropout']: noisetype = 'binomial' assert None not in [n, p], "n and p must be provided for binomial noise." mode = 'mul' if mode is None else mode elif noisetype in ['gaussian', 'normal']: noisetype = 'normal' assert sigma is not None, "sigma must be provided for normal noise." mode = 'add' if mode is None else mode else: raise NotImplementedError("Unknown noisetype: {}".format(noisetype)) # Parse mode if mode in ['add', 'additive', 'addition']: mode = 'add' elif mode in ['mul', 'multiplicative', 'multiplication', 'multiply']: mode = 'mul' else: raise NotImplementedError("Mode {} is not implemented.".format(mode)) # Default rng if srng is None: srng = RandomStreams(seed=42) elif isinstance(srng, int): srng = RandomStreams(seed=srng) # Make noise kernel if noisetype == 'normal': noisekernel = T.cast(srng.normal(size=inp.shape, std=sigma), dtype='floatX') elif noisetype == 'binomial': noisekernel = T.cast(srng.binomial(size=inp.shape, n=n, p=p), dtype='floatX') else: raise NotImplementedError # Couple with input if mode == 'add': y = inp + noisekernel elif mode == 'mul': y = inp * noisekernel else: raise NotImplementedError if thicken and noisetype is 'binomial': y = y / getattr(np, th.config.floatX)(p) # Return return y
class Sigmoid(ActivationFunction): def __init__(self): self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) def nonDeterminstic(self, x): val = self.deterministic(x) return self.theanoGenerator.binomial(size=val.shape, n=1, p=val, dtype=theanoFloat) def deterministic(self, x): return T.nnet.sigmoid(x) def activationProbablity(self, x): return T.nnet.sigmoid(x)
class LinearDropoutLayer(LinearLayer): def __init__(self, rng, n_in, n_out, activation, dropout_rate, use_bias, W=None, b=None): super(LinearDropoutLayer, self).__init__(rng=rng, n_in=n_in, n_out=n_out, W=W, b=b, activation=activation, use_bias=use_bias) self.dropout_rate = dropout_rate self.srng = RandomStreams(rng.randint(1e6)) def output(self, input): mask = self.srng.binomial(n=1, p=1-self.dropout_rate, size=input.shape, dtype=theano.config.floatX) return input * mask
class GibbsRegressor(ISymbolicPredictor): def __init__(self, n_dim_in, n_dim_out, sample_y = False, n_alpha = 1, possible_ws = [0, 1], alpha_update_policy = 'sequential', seed = None): self._w = theano.shared(np.zeros((n_dim_in, n_dim_out), dtype = theano.config.floatX), name = 'w') self._rng = RandomStreams(seed) if n_alpha == 'all': n_alpha = n_dim_in self._n_alpha = n_alpha self._alpha = theano.shared(np.arange(n_alpha)) # scalar self._sample_y = sample_y self._possible_ws = theano.shared(np.array(possible_ws), name = 'possible_ws') assert alpha_update_policy in ('sequential', 'random') self._alpha_update_policy = alpha_update_policy def _add_alpha_update(self): new_alpha = (self._alpha+self._n_alpha) % self._w.shape[0] \ if self._alpha_update_policy == 'sequential' else \ self._rng.choice(a=self._w.shape[0], size = (self._n_alpha, ), replace = False).reshape([-1]) # Reshape is for some reason necessary when n_alpha=1 add_update(self._alpha, new_alpha) @staticmethod def compute_p_wa(w, x, y, alpha, possible_ws = np.array([0, 1])): """ Compute the probability the weights at index alpha taking on each of the values in possible_ws """ assert x.tag.test_value.ndim == y.tag.test_value.ndim == 2 assert x.tag.test_value.shape[0] == y.tag.test_value.shape[0] assert w.get_value().shape[1] == y.tag.test_value.shape[1] v_current = x.dot(w) # (n_samples, n_dim_out) v_0 = v_current[None, :, :] - w[alpha, None, :]*x.T[alpha, :, None] # (n_alpha, n_samples, n_dim_out) possible_vs = v_0[:, :, :, None] + possible_ws[None, None, None, :]*x.T[alpha, :, None, None] # (n_alpha, n_samples, n_dim_out, n_possible_ws) all_zs = tt.nnet.sigmoid(possible_vs) # (n_alpha, n_samples, n_dim_out, n_possible_ws) log_likelihoods = tt.sum(tt.log(bernoulli(y[None, :, :, None], all_zs[:, :, :, :])), axis = 1) # (n_alpha, n_dim_out, n_possible_ws) # Question: Need to shift for stability here or will Theano take care of that? # Stupid theano didn't implement softmax very nicely so we have to do some reshaping. return tt.nnet.softmax(log_likelihoods.reshape([alpha.shape[0]*w.shape[1], possible_ws.shape[0]]))\ .reshape([alpha.shape[0], w.shape[1], possible_ws.shape[0]]) # (n_alpha, n_dim_out, n_possible_ws) @symbolic_updater def train(self, x, y): p_wa = self.compute_p_wa(self._w, x, y, self._alpha, self._possible_ws) # (n_alpha, n_dim_out, n_possible_ws) w_sample = sample_categorical(self._rng, p_wa, values = self._possible_ws) w_new = tt.set_subtensor(self._w[self._alpha], w_sample) # (n_dim_in, n_dim_out) add_update(self._w, w_new) self._add_alpha_update() @symbolic_simple def predict(self, x): p_y = tt.nnet.sigmoid(x.dot(self._w)) return self._rng.binomial(p = p_y) if self._sample_y else p_y