def test_normal_vector(self): random = RandomStreams(utt.fetch_seed()) avg = tensor.dvector() std = tensor.dvector() out = random.normal(avg=avg, std=std) assert out.ndim == 1 f = function([avg, std], out) avg_val = [1, 2, 3] std_val = [.1, .2, .3] seed_gen = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(avg_val, std_val) numpy_val0 = numpy_rng.normal(loc=avg_val, scale=std_val) assert numpy.allclose(val0, numpy_val0) # arguments of size (2,) val1 = f(avg_val[:-1], std_val[:-1]) numpy_val1 = numpy_rng.normal(loc=avg_val[:-1], scale=std_val[:-1]) assert numpy.allclose(val1, numpy_val1) # Specifying the size explicitly g = function([avg, std], random.normal(avg=avg, std=std, size=(3,))) val2 = g(avg_val, std_val) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.normal(loc=avg_val, scale=std_val, size=(3,)) assert numpy.allclose(val2, numpy_val2) self.assertRaises(ValueError, g, avg_val[:-1], std_val[:-1])
def test_normal_vector(self): random = RandomStreams(utt.fetch_seed()) avg = tensor.dvector() std = tensor.dvector() out = random.normal(avg=avg, std=std) assert out.ndim == 1 f = function([avg, std], out) avg_val = [1, 2, 3] std_val = [0.1, 0.2, 0.3] seed_gen = np.random.RandomState(utt.fetch_seed()) numpy_rng = np.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(avg_val, std_val) numpy_val0 = numpy_rng.normal(loc=avg_val, scale=std_val) assert np.allclose(val0, numpy_val0) # arguments of size (2,) val1 = f(avg_val[:-1], std_val[:-1]) numpy_val1 = numpy_rng.normal(loc=avg_val[:-1], scale=std_val[:-1]) assert np.allclose(val1, numpy_val1) # Specifying the size explicitly g = function([avg, std], random.normal(avg=avg, std=std, size=(3, ))) val2 = g(avg_val, std_val) numpy_rng = np.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.normal(loc=avg_val, scale=std_val, size=(3, )) assert np.allclose(val2, numpy_val2) with pytest.raises(ValueError): g(avg_val[:-1], std_val[:-1])
class NoisyModel(object): def __init__(self, obs_noise=0.0, obs_loc=0.0, state_noise=0.0, state_loc=0.0, state_dim=0, rng=None): self._srng = RandomStreams(seed=rng.seed()) self.rng = rng self._obs_loc = obs_loc self._state_loc = state_loc self._obs_std = obs_noise self._state_std = state_noise self._state_dim = state_dim self._state_noise = self._srng.normal(size=[self._state_dim], std=obs_noise, avg=state_loc) def _noisy_state(self, state): if self._state_std > 0: state = state + self._state_noise return state def _noisy_obs(self, obs): noise = 0.0 if self._obs_std > 0: noise = self.rng.normal(loc=self._obs_loc, scale=self._obs_std, size=obs.shape) o = obs + noise return o
def prediction(self, h, bias): srng = RandomStreams(seed=42) prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \ self.compute_parameters(h, bias) mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1) v = T.arange(0, mean_x.shape[0]) m_x = mean_x[v, mode] m_y = mean_y[v, mode] s_x = std_x[v, mode] s_y = std_y[v, mode] r = rho[v, mode] # cov = r * (s_x * s_y) normal = srng.normal((h.shape[0], 2)) x = normal[:, 0] y = normal[:, 1] # x_n = T.shape_padright(s_x * x + cov * y + m_x) # y_n = T.shape_padright(s_y * y + cov * x + m_y) x_n = T.shape_padright(m_x + s_x * x) y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1. - r**2))) uniform = srng.uniform((h.shape[0], )) pin = T.shape_padright(T.cast(bernoulli > uniform, floatX)) return T.concatenate([x_n, y_n, pin], axis=1)
class SampleGaussian(Layer): def __init__(self, initial_std=1.0, **kwargs): super(SampleGaussian, self).__init__(**kwargs) self.initial_std = initial_std if K.backend() == 'theano': from theano.tensor.shared_randomstreams import RandomStreams self.random = RandomStreams() elif K.backend() == 'tensorflow': import tensorflow as tf else: raise NotImplementedError def build(self, input_shape): shape = input_shape[-1:] def my_init(shape, dtype=None): return K.zeros(shape, dtype=dtype) + K.log(self.initial_std) self._logstd = self.add_weight('logstd', shape, initializer=my_init, trainable=True) super(SampleGaussian, self).build(input_shape) def call(self, x, mask=None): if K.backend() == 'theano': return self.random.normal(x.shape, x, self.std()) elif K.backend() == 'tensorflow': import tensorflow as tf return tf.random_normal(tf.shape(x), x, self.std()) else: raise NotImplementedError def std(self): return K.exp(self._logstd)
def __init__(self, latent_dim, hidden_dim, exploration_probability, clip_value, value_decay, data, batch_size, exploration_decay_rate): self.latent_dim = latent_dim self.words = data["words"] self.depth = 1 + max(len(w) for w in self.words) depth = self.depth self.hidden_dim = hidden_dim self.characters = data["characters"] self.charset = data["charset"] self.charmap = data["charmap"] self.wordcount = len(self.words) self.charcount = len(self.charset) self.generator = Generator("generator", latent_dim, depth, self.charcount, hidden_dim, exploration_probability, exploration_decay_rate) self.discriminator = Discriminator("discriminator", depth, self.charcount, hidden_dim) self.clip_value = np.float32(clip_value) self.value_decay = theano.shared(np.float32(value_decay), "value_decay") self.batch_size = batch_size self.word_vectors = np.vstack([self.word_to_vector(word).reshape((1, -1)) for word in self.words]).astype( np.int32) xreal = Input((depth,), name="xreal", dtype="int32") batch_n = T.iscalar("batch_n") srng = RandomStreams(seed=234) z = srng.normal(size=(batch_n, latent_dim)) e = srng.uniform(size=(batch_n, depth), low=0, high=1) ex = srng.random_integers(size=(batch_n, latent_dim), low=0, high=self.charcount) # z = Input((latent_dim,), name="z", dtype="float32") # e = Input((depth,), name="e", dtype="float32") # ex = Input((depth,), name="ex", dtype="int32") # xreal = T.imatrix("xreal") # z = T.fmatrix("z") # e = T.fmatrix("e") # ex = T.imatrix("ex") _, xfake = self.generator.policy(z, e, ex) xfake = theano.gradient.zero_grad(xfake) # print("xfake: {}, {}".format(xfake, xfake.type)) # print("xreal: {}, {}".format(xreal, xreal.type)) _, yfake = self.discriminator.discriminator(xfake) _, yreal = self.discriminator.discriminator(xreal) dloss = T.mean(yfake, axis=None) - T.mean(yreal, axis=None) dconstraints = {p: ClipConstraint(self.clip_value) for p in self.discriminator.clip_params} dopt = Adam(1e-4) dupdates = dopt.get_updates(self.discriminator.params, dconstraints, dloss) n = z.shape[0] outputs_info = [T.zeros((n,), dtype='float32')] yfaker = T.transpose(yfake[:, ::-1], (1, 0)) vtarget, _ = theano.scan(reward_function, outputs_info=outputs_info, sequences=yfaker, non_sequences=self.value_decay) vtarget = T.transpose(vtarget, (1, 0))[:, ::-1] # print("vtarget: {}, {}, {}".format(vtarget, vtarget.ndim, vtarget.type)) _, vpred = self.generator.value(z, xfake) gloss = T.mean(T.abs_(vtarget - vpred), axis=None) gopt = Adam(1e-5) gupdates = gopt.get_updates(self.generator.params, {}, gloss) self.discriminator_train_function = theano.function([xreal, batch_n], [dloss], updates=dupdates) self.generator_train_function = theano.function([batch_n], [gloss], updates=gupdates) self.generator_sample_function = theano.function([batch_n], [xfake]) self.test_function = theano.function([xreal, batch_n], [dloss, gloss])
def prediction(self, h, bias): srng = RandomStreams(seed=42) prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \ self.compute_parameters(h, bias) mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1) v = T.arange(0, mean_x.shape[0]) m_x = mean_x[v, mode] m_y = mean_y[v, mode] s_x = std_x[v, mode] s_y = std_y[v, mode] r = rho[v, mode] # cov = r * (s_x * s_y) normal = srng.normal((h.shape[0], 2)) x = normal[:, 0] y = normal[:, 1] # x_n = T.shape_padright(s_x * x + cov * y + m_x) # y_n = T.shape_padright(s_y * y + cov * x + m_y) x_n = T.shape_padright(m_x + s_x * x) y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1.-r**2))) uniform = srng.uniform((h.shape[0],)) pin = T.shape_padright(T.cast(bernoulli > uniform, floatX)) return T.concatenate([x_n, y_n, pin], axis=1)
class VariationalLayer(object): def __init__(self, rng, sample=True): self.theano_rng = RandomStreams(rng.randint(2**30)) self.sample = sample self.params = [] def __call__(self, input): if self.sample: mu, sg = input[:, 0::2, :], input[:, 1::2, :] eps = self.theano_rng.normal(mu.shape, std=0.01, dtype=theano.config.floatX) eta = 1 return mu + T.exp(sg * eta) * eps else: return input[:, 0::2, :] def inv(self, output): pass def load(self, filename): pass def save(self, filename): pass def reset(self): pass
class SampleGaussianFixedVariance(Layer): def __init__(self, std=1.0, **kwargs): super(SampleGaussianFixedVariance, self).__init__(**kwargs) self._std = std if K.backend() == 'theano': from theano.tensor.shared_randomstreams import RandomStreams self.random = RandomStreams() elif K.backend() == 'tensorflow': import tensorflow as tf else: raise NotImplementedError def build(self, input_shape): self.stdshape = input_shape[-1:] super(SampleGaussianFixedVariance, self).build(input_shape) def call(self, x, mask=None): if K.backend() == 'theano': return self.random.normal(x.shape, x, self._std) elif K.backend() == 'tensorflow': import tensorflow as tf return tf.random_normal(tf.shape(x), x, self._std) else: raise NotImplementedError def std(self): return np.tile(self._std, self.stdshape)
class KmeansMiniBatch(object): def __init__(self, batch_size, data=None, K=300, epsilon_whitening=0.015): if data is None: self.X = T.matrix('X_train') else: self.X = data ######################## # Normalize the inputs # ######################## # A constant added to the variance to avoid division by zero self.epsilon_norm = 10 self.epsilon_whitening = epsilon_whitening # We subtract from each training sample (each column in X_train) its mean self.X = self.X - T.mean(self.X, axis=0) / T.sqrt(T.var(self.X, axis=0) + self.epsilon_norm) ##################### # Whiten the inputs # ##################### sigma = T.dot(self.X, T.transpose(self.X)) / self.X.shape[1] U, s, V = linalg.svd(sigma, full_matrices=False) tmp = T.dot(U, T.diag(1/T.sqrt(s + self.epsilon_whitening))) tmp = T.dot(tmp, T.transpose(U)) self.X = T.dot(tmp, self.X) ################## # Initialization # ################## self.K = K # The number of clusters self.dimensions = self.X.shape[0] self.samples = batch_size self.srng = RandomStreams(seed=234) # We initialize the centroids by sampling them from a normal # distribution, and then normalizing them to unit length # D \in R^{n \times k} self.D = self.srng.normal(size=(self.dimensions, self.K)) self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0)) def fit_once(self): # Initialize new point representations # for every pass of the algorithm S = T.zeros((self.K, self.samples)) tmp = T.dot(self.D.T, self.X) res = T.argmax(tmp, axis=0) max_values = tmp[res, T.arange(self.samples)] S = T.set_subtensor(S[res, T.arange(self.samples)], max_values) self.D = T.dot(self.X, T.transpose(S)) self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0)) return self.D
class KmeansMiniBatch(object): def __init__(self, batch_size, data=None, K=300, epsilon_whitening=0.015): if data is None: self.X = T.matrix('X_train') else: self.X = data ######################## # Normalize the inputs # ######################## # A constant added to the variance to avoid division by zero self.epsilon_norm = 10 self.epsilon_whitening = epsilon_whitening # We subtract from each training sample (each column in X_train) its mean self.X = self.X - T.mean(self.X, axis=0) / T.sqrt( T.var(self.X, axis=0) + self.epsilon_norm) ##################### # Whiten the inputs # ##################### sigma = T.dot(self.X, T.transpose(self.X)) / self.X.shape[1] U, s, V = linalg.svd(sigma, full_matrices=False) tmp = T.dot(U, T.diag(1 / T.sqrt(s + self.epsilon_whitening))) tmp = T.dot(tmp, T.transpose(U)) self.X = T.dot(tmp, self.X) ################## # Initialization # ################## self.K = K # The number of clusters self.dimensions = self.X.shape[0] self.samples = batch_size self.srng = RandomStreams(seed=234) # We initialize the centroids by sampling them from a normal # distribution, and then normalizing them to unit length # D \in R^{n \times k} self.D = self.srng.normal(size=(self.dimensions, self.K)) self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0)) def fit_once(self): # Initialize new point representations # for every pass of the algorithm S = T.zeros((self.K, self.samples)) tmp = T.dot(self.D.T, self.X) res = T.argmax(tmp, axis=0) max_values = tmp[res, T.arange(self.samples)] S = T.set_subtensor(S[res, T.arange(self.samples)], max_values) self.D = T.dot(self.X, T.transpose(S)) self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0)) return self.D
def kmeans(train_set_x): if train_set_x is None: train_set_x = T.matrix('train_set_x') ######################## # Normalize the inputs # ######################## epsilon_norm = 10 epsilon_zca = 0.015 K = 500 train_set_x = train_set_x - T.mean(train_set_x, axis=0) / T.sqrt(T.var(train_set_x, axis=0) + epsilon_norm) ##################### # Whiten the inputs # ##################### # a simple choice of whitening transform is the ZCA whitening transform # epsilon_zca is small constant # for contrast-normalizaed data, setting epsilon_zca to 0.01 for 16-by-16 pixel patches, # or to 0.1 for 8-by-8 pixel patches # is good starting point cov = T.dot(train_set_x, T.transpose(train_set_x)) / train_set_x.shape[1] U, S, V = linalg.svd(cov) tmp = T.dot(U, T.diag(1/T.sqrt(S + epsilon_zca))) tmp = T.dot(tmp, T.transpose(U)) whitened_x = T.dot(tmp, train_set_x) ###################### # Training the Model # ###################### # Initialization dimension_size = whitened_x.shape[0] num_samples = whitened_x.shape[1] srng = RandomStreams(seed=234) D = srng.normal(size=(dimension_size, K)) D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) # typically 10 iterations is enough num_iteration = 15 # compute new centroids, D_new for i in xrange(num_iteration): dx = T.dot(D.T, whitened_x) arg_max_dx = T.argmax(dx, axis=0) s = dx[arg_max_dx, T.arange(num_samples)] S = T.zeros((K, num_samples)) S = T.set_subtensor(S[arg_max_dx, T.arange(num_samples)], s) D = T.dot(whitened_x, T.transpose(S)) + D D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) return D
def Kmeans(X_train=None, K=300, epsilon_whitening=0.015): if X_train is None: X_train = T.matrix('X_train') ######################## # Normalize the inputs # ######################## # A constant added to the variance to avoid division by zero epsilon_norm = 10 # We subtract from each training sample (each column in X_train) its mean X_train = X_train - T.mean( X_train, axis=0) / T.sqrt(T.var(X_train, axis=0) + epsilon_norm) ##################### # Whiten the inputs # ##################### sigma = T.dot(X_train, T.transpose(X_train)) / X_train.shape[1] U, s, V = linalg.svd(sigma, full_matrices=False) tmp = T.dot(U, T.diag(1 / T.sqrt(s + epsilon_whitening))) tmp = T.dot(tmp, T.transpose(U)) X_Whitened = T.dot(tmp, X_train) ###################### # Training the Model # ###################### # Initialization dimensions = X_Whitened.shape[0] samples = X_Whitened.shape[1] srng = RandomStreams(seed=234) # We initialize the centroids by sampling them from a normal # distribution, and then normalizing them to unit length # D \in R^{n \times k} D = srng.normal(size=(dimensions, K)) D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) iterations = 30 for i in xrange(iterations): # Initialize new point representations # for every pass of the algorithm S = T.zeros((K, samples)) tmp = T.dot(D.T, X_Whitened) res = T.argmax(tmp, axis=0) max_values = tmp[res, T.arange(samples)] S = T.set_subtensor(S[res, T.arange(samples)], max_values) D = T.dot(X_Whitened, T.transpose(S)) D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) return D
def Kmeans(X_train=None, K=300, epsilon_whitening=0.015): if X_train is None: X_train = T.matrix("X_train") ######################## # Normalize the inputs # ######################## # A constant added to the variance to avoid division by zero epsilon_norm = 10 # We subtract from each training sample (each column in X_train) its mean X_train = X_train - T.mean(X_train, axis=0) / T.sqrt(T.var(X_train, axis=0) + epsilon_norm) ##################### # Whiten the inputs # ##################### sigma = T.dot(X_train, T.transpose(X_train)) / X_train.shape[1] U, s, V = linalg.svd(sigma, full_matrices=False) tmp = T.dot(U, T.diag(1 / T.sqrt(s + epsilon_whitening))) tmp = T.dot(tmp, T.transpose(U)) X_Whitened = T.dot(tmp, X_train) ###################### # Training the Model # ###################### # Initialization dimensions = X_Whitened.shape[0] samples = X_Whitened.shape[1] srng = RandomStreams(seed=234) # We initialize the centroids by sampling them from a normal # distribution, and then normalizing them to unit length # D \in R^{n \times k} D = srng.normal(size=(dimensions, K)) D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) iterations = 30 for i in xrange(iterations): # Initialize new point representations # for every pass of the algorithm S = T.zeros((K, samples)) tmp = T.dot(D.T, X_Whitened) res = T.argmax(tmp, axis=0) max_values = tmp[res, T.arange(samples)] S = T.set_subtensor(S[res, T.arange(samples)], max_values) D = T.dot(X_Whitened, T.transpose(S)) D = D / T.sqrt(T.sum(T.sqr(D), axis=0)) return D
class GaussianNoise(Layer): def __init__(self, std): self.std = numpy.array(std).astype(theano.config.floatX) self.rng = RandomStreams(numpy.random.randint(1234)) def forward(self, x): # print("Layer/GaussianNoise") noise = self.rng.normal(std=self.std, size=x.shape) return x + noise
class GaussianNoise(Layer): def __init__(self,std): self.std = numpy.array(std).astype(theano.config.floatX) self.rng = RandomStreams(numpy.random.randint(1234)) def forward(self,x): print "Layer/GaussianNoise" noise = self.rng.normal(std=self.std,size=x.shape) return x + noise
class Noise: def __init__(self, dim_out, std=1e-2): self.dim_out = dim_out self.std = theano.shared(std) self.rng = RandomStreams() self.inputs = T.matrix() self.cmp = theano.function([self.inputs], self.apply(self.inputs)) def apply(self, inputs): return inputs + self.rng.normal(std=self.std, size=(inputs.shape[0], self.dim_out), dtype=config.floatX)
class Dropout(): def __init__(self, input, p, drop_switch): self.input = input self.srng = RandomStreams(seed=234) self.rv_n = self.srng.normal(self.input.shape) self.mask = T.cast( self.rv_n < p, dtype=theano.config.floatX ) / p # first dropout mask, scaled with /p so we do not have to perform test time scaling (source: cs231n) self.output = ifelse(drop_switch > 0.5, self.input * self.mask, self.input) # only drop if drop == 1.0
def virtual_adversarial_training_finite_diff( x, t, forward_func, main_obj_type, epsilon, lamb=numpy.asarray(1.0, theano.config.floatX), norm_constraint='L2', num_power_iter=1, unchain_y=True, xi=1e-6, x_for_generating_adversarial_examples=None, forward_func_for_generating_adversarial_examples=None, ): print "costs/virtual_adversarial_training_finite_diff" print "### HyperParameters ###" print "epsilon:", str(epsilon) print "lambda:", str(lamb) print "norm_constraint:", str(norm_constraint) print "num_power_iter:", str(num_power_iter) print "unchain_y:", str(unchain_y) print "xi:", str(xi) print "#######################" ret = 0 y = forward_func(x) ret += get_main_obj(y, t, main_obj_type) if (x_for_generating_adversarial_examples != None): x = x_for_generating_adversarial_examples y = forward_func(x) if (forward_func_for_generating_adversarial_examples != None): forward_func = forward_func_for_generating_adversarial_examples y = forward_func(x) rng = RandomStreams(seed=numpy.random.randint(1234)) d = rng.normal(size=x.shape, dtype=theano.config.floatX) # power_iteration for power_iter in xrange(num_power_iter): d = xi * get_normalized_vector(d) y_d = forward_func(x + d) Hd = T.grad(get_kl(y_d, y, main_obj_type).mean(), wrt=d) / xi Hd = theano.gradient.disconnected_grad(Hd) d = Hd r_vadv = get_perturbation(d, epsilon, norm_constraint) if (unchain_y == True): y_hat = theano.gradient.disconnected_grad(y) vadv_cost = get_kl(forward_func(x + r_vadv), y_hat, main_obj_type).mean() else: vadv_cost = get_kl(forward_func(x + r_vadv), y, main_obj_type, include_ent_term=True).mean() ret += lamb * vadv_cost return ret
def random_stuff(): rng = RandomStreams(seed = None) a = rng.uniform((10, 10)) b = rng.normal((10, 1)) tdbplot(a, 'a', ImagePlot(cmap = 'jet')) tdbplot(b, 'b', HistogramPlot(edges = np.linspace(-5, 5, 20))) c = a+b return c
def random_stuff(): rng = RandomStreams(seed=None) a = rng.uniform((10, 10)) b = rng.normal((10, 1)) tdbplot(a, 'a', ImagePlot(cmap='jet')) tdbplot(b, 'b', HistogramPlot(edges=np.linspace(-5, 5, 20))) c = a + b return c
def MDN_output_layer(x, h, y, in_size, out_size, hidden_size, pred): if connect_h_to_o: hiddens = T.concatenate([hidden for hidden in h], axis=2) hidden_out_size = hidden_size * len(h) else: hiddens = h[-1] hidden_out_size = hidden_size mu_linear = Linear(name='mu_linear' + str(pred), input_dim=hidden_out_size, output_dim=out_size * components_size[network_mode]) sigma_linear = Linear(name='sigma_linear' + str(pred), input_dim=hidden_out_size, output_dim=components_size[network_mode]) mixing_linear = Linear(name='mixing_linear' + str(pred), input_dim=hidden_out_size, output_dim=components_size[network_mode]) initialize([mu_linear, sigma_linear, mixing_linear]) mu = mu_linear.apply(hiddens) mu = mu.reshape( (mu.shape[0], mu.shape[1], out_size, components_size[network_mode])) sigma_orig = sigma_linear.apply(hiddens) sigma = T.nnet.softplus(sigma_orig) mixing_orig = mixing_linear.apply(hiddens) e_x = T.exp(mixing_orig - mixing_orig.max(axis=2, keepdims=True)) mixing = e_x / e_x.sum(axis=2, keepdims=True) exponent = -0.5 * T.inv(sigma) * T.sum( (y.dimshuffle(0, 1, 2, 'x') - mu)**2, axis=2) normalizer = (2 * np.pi * sigma) exponent = exponent + T.log(mixing) - (out_size * .5) * T.log(normalizer) # LogSumExp(x) max_exponent = T.max(exponent, axis=2, keepdims=True) mod_exponent = exponent - max_exponent gauss_mix = T.sum(T.exp(mod_exponent), axis=2, keepdims=True) log_gauss = T.log(gauss_mix) + max_exponent cost = -T.mean(log_gauss) srng = RandomStreams(seed=seed) mixing = mixing_orig * (1 + sampling_bias) sigma = T.nnet.softplus(sigma_orig - sampling_bias) e_x = T.exp(mixing - mixing.max(axis=2, keepdims=True)) mixing = e_x / e_x.sum(axis=2, keepdims=True) component = srng.multinomial(pvals=mixing) component_mean = T.sum(mu * component.dimshuffle(0, 1, 'x', 2), axis=3) component_std = T.sum(sigma * component, axis=2, keepdims=True) linear_output = srng.normal(avg=component_mean, std=component_std) linear_output.name = 'linear_output' return linear_output, cost
class KernelDensityEstimateDistribution(Distribution): """Randomly samples from a kernel density estimate yielded by a set of training points. Simple sampling procedure [1]: 1. With training points $x_1, ... x_n$, sample a point $x_i$ uniformly 2. From original KDE, we have a kernel defined at point $x_i$; sample randomly from this kernel [1]: http://www.stat.cmu.edu/~cshalizi/350/lectures/28/lecture-28.pdf """ def __init__(self, X, bandwidth=1, space=None, rng=None): """ Parameters ---------- X : ndarray of shape (num_examples, num_features) Training examples from which to generate a kernel density estimate bandwidth : float Bandwidth (or h, or sigma) of the generated kernels """ assert X.ndim == 2 if space is None: space = VectorSpace(dim=X.shape[1], dtype=X.dtype) # super(KernelDensityEstimateDistribution, self).__init__(space) self.X = sharedX(X, name='KDE_X') self.bandwidth = sharedX(bandwidth, name='bandwidth') self.rng = RandomStreams() if rng is None else rng def sample(self, n): # Sample $n$ training examples training_samples = self.X[self.rng.choice(size=(n, ), a=self.X.shape[0], replace=True)] # Sample individually from each selected associated kernel # # (not well documented within NumPy / Theano, but rng.normal # call samples from a multivariate normal with diagonal # covariance matrix) ret = self.rng.normal(size=(n, self.X.shape[1]), avg=training_samples, std=self.bandwidth, dtype=theano.config.floatX) return ret
def load_data(self): srng = RandomStreams(seed=234) rv_q = srng.uniform((10 * 10, 15000)).eval() rv_t1 = srng.normal((10 * 100, 15000)).eval() rv_t2 = srng.normal((10 * 40, 15000)).eval() shared_q = theano.shared(np.asarray(rv_q, dtype=theano.config.floatX), name='q', borrow=True) shared_t1 = theano.shared(np.asarray(rv_t1, dtype=theano.config.floatX), name='t1', borrow=True) shared_t2 = theano.shared(np.asarray(rv_t2, dtype=theano.config.floatX), name='t2', borrow=True) return shared_q, shared_t1, shared_t2
def test4_9(): ## 4-9 Random Streams from theano.tensor.shared_randomstreams import RandomStreams random = RandomStreams(seed=42) a = random.normal((1, 3)) b = T.dmatrix('a') f1 = a * b g1 = function([b], f1) print("Invocation 1:", g1(numpy.ones((1, 3)))) print("Invocation 2:", g1(numpy.ones((1, 3)))) print("Invocation 3:", g1(numpy.ones((1, 3))))
def __theano__noise(self, inp, noisetype, p=None, n=None, sigma=None, thicken=True, mode=None, srng=None): # Local imports from theano.tensor.shared_randomstreams import RandomStreams # Parse noise type and check arguments if noisetype in ['binomial', 'dropout']: noisetype = 'binomial' assert None not in [n, p], "n and p must be provided for binomial noise." mode = 'mul' if mode is None else mode elif noisetype in ['gaussian', 'normal']: noisetype = 'normal' assert sigma is not None, "sigma must be provided for normal noise." mode = 'add' if mode is None else mode else: raise NotImplementedError("Unknown noisetype: {}".format(noisetype)) # Parse mode if mode in ['add', 'additive', 'addition']: mode = 'add' elif mode in ['mul', 'multiplicative', 'multiplication', 'multiply']: mode = 'mul' else: raise NotImplementedError("Mode {} is not implemented.".format(mode)) # Default rng if srng is None: srng = RandomStreams(seed=42) elif isinstance(srng, int): srng = RandomStreams(seed=srng) # Make noise kernel if noisetype == 'normal': noisekernel = T.cast(srng.normal(size=inp.shape, std=sigma), dtype='floatX') elif noisetype == 'binomial': noisekernel = T.cast(srng.binomial(size=inp.shape, n=n, p=p), dtype='floatX') else: raise NotImplementedError # Couple with input if mode == 'add': y = inp + noisekernel elif mode == 'mul': y = inp * noisekernel else: raise NotImplementedError if thicken and noisetype is 'binomial': y = y / getattr(np, th.config.floatX)(p) # Return return y
def test_tutorial(self): srng = RandomStreams(seed=234) rv_u = srng.uniform((2, 2)) rv_n = srng.normal((2, 2)) f = function([], rv_u) g = function([], rv_n, no_default_updates=True) # Not updating rv_n.rng nearly_zeros = function([], rv_u + rv_u - 2 * rv_u) assert numpy.all(f() != f()) assert numpy.all(g() == g()) assert numpy.all(abs(nearly_zeros()) < 1e-5) assert isinstance(rv_u.rng.get_value(borrow=True), numpy.random.RandomState)
class FastDropoutLayer(Layer): """ Basic linear transformation layer (W.X + b) """ def __init__(self, rng): super(FastDropoutLayer, self).__init__() seed = rng.randint(2 ** 30) self.srng = RandomStreams(seed) def output_func(self, input): mask = self.srng.normal(size=input.shape, avg=1., dtype=theano.config.floatX) return input * mask def __repr__(self): return "{}".format(self.__class__.__name__)
def LDS_finite_diff(x, forward_func, main_obj_type, epsilon, norm_constraint="L2", num_power_iter=1, xi=1e-6): rng = RandomStreams(seed=numpy.random.randint(1234)) y = forward_func(x) d = rng.normal(size=x.shape, dtype=theano.config.floatX) # power_iteration for power_iter in xrange(num_power_iter): d = xi * get_normalized_vector(d) y_d = forward_func(x + d) Hd = T.grad(get_kl(y_d, y, main_obj_type).mean(), wrt=d) / xi Hd = theano.gradient.disconnected_grad(Hd) d = Hd r_vadv = get_perturbation(d, epsilon, norm_constraint) return -get_kl(forward_func(x + r_vadv), y, main_obj_type, include_ent_term=True)
class RectifiedNoisyVar1(ActivationFunction): def __init__(self): self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) def nonDeterminstic(self, x): x += self.theanoGenerator.normal(avg=0.0, std=1.0) return x * (x > 0.0) def deterministic(self, x): return expectedValueRectified(x, 1.0) def activationProbablity(self, x): return 1.0 - cdf(0, miu=x, variance=1.0)
def load_data(self): srng = RandomStreams(seed=234) rv_q = srng.uniform((10 * 10, 15000)).eval() rv_t1 = srng.normal((10 * 100, 15000)).eval() rv_t2 = srng.normal((10 * 40, 15000)).eval() shared_q = theano.shared( np.asarray(rv_q, dtype=theano.config.floatX), name='q', borrow=True) shared_t1 = theano.shared( np.asarray(rv_t1, dtype=theano.config.floatX), name='t1', borrow=True) shared_t2 = theano.shared( np.asarray(rv_t2, dtype=theano.config.floatX), name='t2', borrow=True) return shared_q, shared_t1, shared_t2
class ApproximatedRectifiedNoisy(ActivationFunction): def __init__(self): self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) def nonDeterminstic(self, x): x += self.theanoGenerator.normal(avg=0.0, std=(T.sqrt(T.nnet.sigmoid(x)) + 1e-8)) return x * (x > 0.0) def deterministic(self, x): return expectedValueRectified(x, T.nnet.sigmoid(x) + 1e-08) def activationProbablity(self, x): return 1.0 - cdf(0, miu=x, variance=T.nnet.sigmoid(x))
def virtual_adversarial_training_finite_diff( x, t, forward_func, main_obj_type, epsilon, lamb=numpy.asarray(1.0, theano.config.floatX), norm_constraint="L2", num_power_iter=1, unchain_y=True, xi=1e-6, x_for_generating_adversarial_examples=None, forward_func_for_generating_adversarial_examples=None, ): print "costs/virtual_adversarial_training_finite_diff" print "### HyperParameters ###" print "epsilon:", str(epsilon) print "lambda:", str(lamb) print "norm_constraint:", str(norm_constraint) print "num_power_iter:", str(num_power_iter) print "unchain_y:", str(unchain_y) print "xi:", str(xi) print "#######################" ret = 0 y = forward_func(x) ret += get_main_obj(y, t, main_obj_type) if x_for_generating_adversarial_examples != None: x = x_for_generating_adversarial_examples y = forward_func(x) if forward_func_for_generating_adversarial_examples != None: forward_func = forward_func_for_generating_adversarial_examples y = forward_func(x) rng = RandomStreams(seed=numpy.random.randint(1234)) d = rng.normal(size=x.shape, dtype=theano.config.floatX) # power_iteration for power_iter in xrange(num_power_iter): d = xi * get_normalized_vector(d) y_d = forward_func(x + d) Hd = T.grad(get_kl(y_d, y, main_obj_type).mean(), wrt=d) / xi Hd = theano.gradient.disconnected_grad(Hd) d = Hd r_vadv = get_perturbation(d, epsilon, norm_constraint) if unchain_y == True: y_hat = theano.gradient.disconnected_grad(y) vadv_cost = get_kl(forward_func(x + r_vadv), y_hat, main_obj_type).mean() else: vadv_cost = get_kl(forward_func(x + r_vadv), y, main_obj_type, include_ent_term=True).mean() ret += lamb * vadv_cost return ret
def unroll_layers(self, cost_method, noise_std_dev): inner_code_length = self.inner_code_length hidden_layer_sizes = self.mid_layer_sizes + [inner_code_length] num_hidden = len(hidden_layer_sizes) # create a new random stream for generating gaussian noise srng = RandomStreams(numpy.random.RandomState(234).randint(2**30)) for i in xrange(num_hidden): reverse_indx = num_hidden-i-1 mirror_layer = self.sigmoid_layers[reverse_indx]; # add gaussian noise to codes (the middle layer) for fine tuning if i == 0 and noise_std_dev > 0: layer_input = self.sigmoid_layers[-1].output+srng.normal(self.sigmoid_layers[-1].output.shape,avg=0.0,std=noise_std_dev); else: layer_input = self.sigmoid_layers[-1].output # create the relevant layer (last layer is a softmax layer which we calculate the cross entropy error of during fine tuning) if i == num_hidden and cost_method == 'cross_entropy': self.logRegressionLayer = HiddenLayer( input = layer_input, n_in = mirror_layer.n_out, n_out = mirror_layer.n_in, init_W = mirror_layer.W.value.T, activation = T.nnet.softmax, mirroring = True) else: sigmoid_layer = HiddenLayer(rng = self.numpy_rng, input = layer_input, n_in = mirror_layer.n_out, n_out = mirror_layer.n_in, init_W = mirror_layer.W.get_value().T, #init_b = mirror_layer.b.get_value().reshape(mirror_layer.b.get_value().shape[0],1), #cant for the life of me think of a good default for this activation = T.nnet.sigmoid, mirroring = True) #print 'created layer(n_in:%d n_out:%d)'%(sigmoid_layer.n_in,sigmoid_layer.n_out) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) ##NB NN training gradients are computed with respect to self.params self.y = self.sigmoid_layers[-1].output; self.n_sigmoid_layers = len(self.sigmoid_layers) # compute the cost (cross entropy) for second phase of training # can't get nll to work so just use squared diff to get something working! (MKT) if cost_method == 'cross_entropy': self.finetune_cost = self.cross_entropy_error() else: self.finetune_cost = self.squared_diff_cost()
class KernelDensityEstimateDistribution(Distribution): """Randomly samples from a kernel density estimate yielded by a set of training points. Simple sampling procedure [1]: 1. With training points $x_1, ... x_n$, sample a point $x_i$ uniformly 2. From original KDE, we have a kernel defined at point $x_i$; sample randomly from this kernel [1]: http://www.stat.cmu.edu/~cshalizi/350/lectures/28/lecture-28.pdf """ def __init__(self, X, bandwidth=1, space=None, rng=None): """ Parameters ---------- X : ndarray of shape (num_examples, num_features) Training examples from which to generate a kernel density estimate bandwidth : float Bandwidth (or h, or sigma) of the generated kernels """ assert X.ndim == 2 if space is None: space = VectorSpace(dim=X.shape[1], dtype=X.dtype) # super(KernelDensityEstimateDistribution, self).__init__(space) self.X = sharedX(X, name='KDE_X') self.bandwidth = sharedX(bandwidth, name='bandwidth') self.rng = RandomStreams() if rng is None else rng def sample(self, n): # Sample $n$ training examples training_samples = self.X[self.rng.choice(size=(n,), a=self.X.shape[0], replace=True)] # Sample individually from each selected associated kernel # # (not well documented within NumPy / Theano, but rng.normal # call samples from a multivariate normal with diagonal # covariance matrix) ret = self.rng.normal(size=(n, self.X.shape[1]), avg=training_samples, std=self.bandwidth, dtype=theano.config.floatX) return ret
def test_tutorial(self): srng = RandomStreams(seed=234) rv_u = srng.uniform((2, 2)) rv_n = srng.normal((2, 2)) f = function([], rv_u) g = function([], rv_n, no_default_updates=True) # Not updating rv_n.rng nearly_zeros = function([], rv_u + rv_u - 2 * rv_u) assert np.all(f() != f()) assert np.all(g() == g()) assert np.all(abs(nearly_zeros()) < 1e-5) assert isinstance(rv_u.rng.get_value(borrow=True), np.random.RandomState)
def gaussian(shape, std): """ Draw random samples from a normal distribution. Parameters ---------- shape : output shape std : standard deviation Returns ------- Drawn samples from the parameterized normal distribution """ rng = RandomStreams(seed=1234) return rng.normal(std=std, size=shape)
class NormalSampler(Layer): def __init__(self, axis_to_split, **kwargs): self.axis = axis_to_split self.rng = RandomStreams() super(NormalSampler, self).__init__(**kwargs) def get_output(self, train=False): inp = self.get_input(train) if self.axis == 2: new_axis_size = inp.shape[2] / 2 new_size = (inp.shape[0], inp.shape[1], new_axis_size) mu, sigma = T.split(inp, [new_axis_size, new_axis_size], 2, axis=2) return mu + self.rng.normal(size=new_size) * sigma else: raise Exception('Other axes not implemented.')
class VariationalSampleLayer(layers.MergeLayer): def __init__(self, incoming_mu, incoming_logsigma, **kwargs): super(VariationalSampleLayer, self).__init__(incomings=[incoming_mu, incoming_logsigma], **kwargs) self.srng = RandomStreams(seed=234) def get_output_shape_for(self, input_shapes): return input_shapes[0] def get_output_for(self, inputs, deterministic=False, **kwargs): mu, logsigma = inputs shape=(self.input_shapes[0][0] or inputs[0].shape[0], self.input_shapes[0][1] or inputs[0].shape[1]) if deterministic: return mu return mu + T.exp(logsigma) * self.srng.normal(shape, avg=0.0, std=1).astype(theano.config.floatX)
def test_normal(self): """Test that RandomStreams.normal generates the same results as numpy""" # Check over two calls to see if the random state is correctly updated. random = RandomStreams(utt.fetch_seed()) fn = function([], random.normal((2, 2), -1, 2)) fn_val0 = fn() fn_val1 = fn() rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30) rng = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit numpy_val0 = rng.normal(-1, 2, size=(2, 2)) numpy_val1 = rng.normal(-1, 2, size=(2, 2)) assert numpy.allclose(fn_val0, numpy_val0) assert numpy.allclose(fn_val1, numpy_val1)
def corrupt(input_var, nkwargs): rng = np.random.RandomState(498) theano_rng = RandomStreams(rng.randint(2**30)) if 'corruption' not in nkwargs: corrupted_input = input_var elif nkwargs['corruption'] == 'binomial': corruped_input = theano_rng.binomial( size=input_var.shape, n=1, p=1 - corruption_p) * input_var elif nkwargs['corruption'] in ['gaussian', 'normal']: corrupted_input = theano_rng.normal( size=input_var.shape, avg=0.0, std=1.0) * input_var else: corrupted_input = input_var return corrupted_input
def prediction(self, h, bias): srng = RandomStreams(seed=42) prop, mean, std = self.compute_parameters(h, bias) mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1) bs = mean.shape[0] v = T.arange(0, bs) m = mean[v, mode] # (bs, d) s = std[v, mode] # (bs, d) normal = srng.normal((bs, self.n_dim)) # (bs, d) normal_n = m + s * normal return normal_n
def test_normal(self): # Test that RandomStreams.normal generates the same results as numpy # Check over two calls to see if the random state is correctly updated. random = RandomStreams(utt.fetch_seed()) fn = function([], random.normal((2, 2), -1, 2)) fn_val0 = fn() fn_val1 = fn() rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2**30) rng = np.random.RandomState(int(rng_seed)) # int() is for 32bit numpy_val0 = rng.normal(-1, 2, size=(2, 2)) numpy_val1 = rng.normal(-1, 2, size=(2, 2)) assert np.allclose(fn_val0, numpy_val0) assert np.allclose(fn_val1, numpy_val1)
def __init__(self, param_length, param_scale, X, Y, ind, builder, **kwargs): self.multistart = kwargs.get('multistart', 1) self.lr = theano.shared(kwargs.get('lr', 0.001)) self.prior_scale = kwargs.get('prior_scale', param_scale) self.param_length = param_length models = [] updates = [] srng = RandomStreams(42) self.entropy_change = [] priors = [] self.params = [] for i in xrange(0, self.multistart): params = theano.shared((np.random.randn(param_length) * np.array(param_scale)).astype(np.float32)) neg_prior = 0.5 * param_length * np.log(2 * np.pi) + np.sum( np.log(self.prior_scale)) + 0.5 * T.dot( params / self.prior_scale, params / self.prior_scale) rvn = srng.normal((param_length, )) model = builder(params, X, Y) cost = model + neg_prior grad = T.grad(cost, params) def jvp(vector): hvp = T.grad(T.dot(grad, vector), params) jvp = vector - self.lr * hvp return jvp r = T.vector() r1 = jvp(r) e_change = theano.function([r, ind], r1) self.entropy_change.append(e_change) self.params.append(params) self._debug = grad updates.append( (params, params - self.lr * grad + rvn * T.sqrt(2 * self.lr))) models.append(model) priors.append(-neg_prior) self._update = theano.function([ind], T.mean(models), updates=updates) self.neg_likelihood = theano.function([ind], models) self.entropy = (0.5 * param_length * (1 + np.log(2 * np.pi)) + np.sum(np.log(param_scale))) self.prior = theano.function([], priors)
class VAE: def __init__(self, q, p, random=1234): self.q = q self.p = p self.srng = RandomStreams(seed=random) def q_f_prop(self, x): params = [] layer_out = x for i, layer in enumerate(self.q[:-2]): params += layer.params layer_out = layer.f_prop(layer_out) params += self.q[-2].params mean = self.q[-2].f_prop(layer_out) params += self.q[-1].params var = self.q[-1].f_prop(layer_out) return mean, var, params def p_f_prop(self, x): params = [] layer_out = x for i, layer in enumerate(self.p): params += layer.params layer_out = layer.f_prop(layer_out) mean = layer_out return mean, params def lower_bound(self, x): mean, var, q_params = self.q_f_prop(x) KL = -1. / 2 * T.mean(T.sum(1 + T.log(var) - mean**2 - var, axis=1)) epsilon = self.srng.normal(mean.shape) z = mean + epsilon * T.sqrt(var) _x, p_params = self.p_f_prop(z) log_likelihood = T.mean( T.sum(x * T.log(_x) + (1 - x) * T.log(1 - _x), axis=1)) params = q_params + p_params lower_bound = [-KL, log_likelihood] return lower_bound, params
class NormalDistribution(Distribution): def __init__(self, seed=1010): self.srng = RandomStreams(seed=seed) @property def n_statistics(self): return 2 @property def fixed_bias(self): # fixed_bias[statistic] return [False, True] @property def fixed_bias_value(self): # fixed_bias_value[statistic] return [0, -1. / 2.] def f(self, x): # x[node, sample] -> f[node, sample, statistic] fv = T.zeros((x.shape[0], x.shape[1], 2)) fv[:, :, 0] = x fv[:, :, 1] = T.sqr(x) return fv def lp(self, fac): # fac[node, sample, statistic] -> lpv[node, sample] return 1. / 2. * T.log(2. * pi) + fac[:, :, 0] def dlp(self, fac): # fac[node, sample, statistic] -> dlp[node, sample, statistic] dlpv = fac.clone() dlpv[:, :, 1] = 0 return dlpv def sampler(self, fac, final_gibbs_sample): # fac[node, sample, statistic] -> sample[node, sample] mean = fac[:, :, 0] if final_gibbs_sample: return mean else: return self.srng.normal(size=mean.shape, avg=mean, std=1.0, dtype=theano.config.floatX)
class NormalDistribution(Distribution): def __init__(self, seed=1010): self.srng = RandomStreams(seed=seed) @property def n_statistics(self): return 2 @property def fixed_bias(self): # fixed_bias[statistic] return [False, True] @property def fixed_bias_value(self): # fixed_bias_value[statistic] return [0, -1./2.] def f(self, x): # x[node, sample] -> f[node, sample, statistic] fv = T.zeros((x.shape[0], x.shape[1], 2)) fv[:, :, 0] = x fv[:, :, 1] = T.sqr(x) return fv def lp(self, fac): # fac[node, sample, statistic] -> lpv[node, sample] return 1./2. * T.log(2. * pi) + fac[:, :, 0] def dlp(self, fac): # fac[node, sample, statistic] -> dlp[node, sample, statistic] dlpv = fac.clone() dlpv[:, :, 1] = 0 return dlpv def sampler(self, fac, final_gibbs_sample): # fac[node, sample, statistic] -> sample[node, sample] mean = fac[:, :, 0] if final_gibbs_sample: return mean else: return self.srng.normal(size=mean.shape, avg=mean, std=1.0, dtype=theano.config.floatX)
def main(n, m, l, d): srng = RandomStreams(seed=random.randint(0, 1000)) #what does seed number mean? X = T.matrix('X', dtype=floatX) M = T.scalar('M', dtype='int64') N = T.scalar('N', dtype='int64') L = T.scalar('L', dtype='int64') rv_a = srng.normal((N, M), avg=0, std=1).astype(floatX) rv_b = srng.normal((N, L), avg=0, std=1).astype(floatX) Y = T.dot(rv_a, X) + rv_b f = theano.function([X, M, N, L], Y) x = np.zeros((m, l)).astype(floatX) + d print f(x, m, n, l)
def main(n, l, n_it, d): srng = RandomStreams(seed=random.randint(0, 1000)) X = T.matrix('X', dtype=floatX) N_IT = T.scalar('N_IT', dtype='int64') N = T.scalar('N', dtype='int64') L = T.scalar('L', dtype='int64') rv_a = srng.normal((N, N), avg=0, std=1).astype(floatX) rv_b = srng.normal((N, L), avg=0, std=1).astype(floatX) #import pdb; pdb.set_trace() Y = X for i in range(n_it): Y = T.dot(rv_a, Y) + rv_b f = theano.function([X, N, L], Y) x = np.zeros((n, l)).astype(floatX) + d y = f(x, n, l) print y
class VAE: def __init__(self, q, p, random=1234): self.q = q self.p = p self.srng = RandomStreams(seed=random) def q_f_prop(self, x): params = [] layer_out = x for i, layer in enumerate(self.q[:-2]): params += layer.params layer_out = layer.f_prop(layer_out) params += self.q[-2].params mean = self.q[-2].f_prop(layer_out) params += self.q[-1].params var = self.q[-1].f_prop(layer_out) return mean, var, params def p_f_prop(self, x): params = [] layer_out = x for i, layer in enumerate(self.p): params += layer.params layer_out = layer.f_prop(layer_out) mean = layer_out return mean, params def lower_bound(self, x): mean, var, q_params = self.q_f_prop(x) KL = -1./2*T.mean(T.sum(1+T.log(var)-mean**2-var, axis=1)) epsilon = self.srng.normal(mean.shape) z = mean+epsilon*T.sqrt(var) _x, p_params = self.p_f_prop(z) log_likelihood = T.mean(T.sum(x*T.log(_x)+(1-x)*T.log(1-_x), axis=1)) params = q_params+p_params lower_bound = [-KL, log_likelihood] return lower_bound, params
class GaussianNoiseLayer(object): def __init__(self, rng, sigma=1.0): self.sigma = sigma self.theano_rng = RandomStreams(rng.randint(2 ** 30)) self.params = [] def __call__(self, input): if self.sigma > 0.0: return (input + self.theano_rng.normal( size=input.shape, dtype=theano.config.floatX)) else: return input def inv(self, output): return output def load(self, filename): pass def save(self, filename): pass
def LDS_finite_diff(x, forward_func, main_obj_type, epsilon, norm_constraint='L2', num_power_iter=1, xi=1e-6): rng = RandomStreams(seed=numpy.random.randint(1234)) y = forward_func(x) d = rng.normal(size=x.shape, dtype=theano.config.floatX) # power_iteration for power_iter in range(num_power_iter): d = xi * get_normalized_vector(d) y_d = forward_func(x + d) Hd = T.grad(get_kl(y_d, y, main_obj_type).mean(), wrt=d) / xi Hd = theano.gradient.disconnected_grad(Hd) d = Hd r_vadv = get_perturbation(d, epsilon, norm_constraint) return -get_kl(forward_func(x + r_vadv), y, main_obj_type, include_ent_term=True)
class VariationalLayer(object): def __init__(self, rng, sample=True): self.theano_rng = RandomStreams(rng.randint(2 ** 30)) self.sample = sample self.params = [] def __call__(self, input): if self.sample: mu, sg = input[:,0::2], input[:,1::2] eps = self.theano_rng.normal(mu.shape, dtype=theano.config.floatX) return mu + T.sqrt(T.exp(sg)) * eps else: return input[:,0::2] def inv(self, output): pass def load(self, filename): pass def save(self, filename): pass
def noise_injection(train_set,std = 0.01,noise_distribution = 'normal'): '''Used for regularization. Noise is injected in the input ''' srng = RandomStreams() number_of_examples = len(train_set) number_of_features = len(train_set[0]) if noise_distribution == 'normal': print ' Normal noise added to the inputs.' noise_matrix = srng.normal(size = (number_of_examples,number_of_features),avg = 0.0,std = std) elif noise_distribution == 'uniform': print ' Uniform noise added to the inputs. ' noise_matrix = srng.uniform(size = (number_of_examples,number_of_features),low = 0.0,high = std) else: noise_matrix = numpy.zeros((number_of_examples,number_of_features)) f = theano.function([],noise_matrix) noise_matrix = f() train_set = train_set + noise_matrix return train_set
def LDS(x, forward_func, main_obj_type, epsilon, norm_constraint='L2', num_power_iter=1): rng = RandomStreams(seed=numpy.random.randint(1234)) y = forward_func(x) d = rng.normal(size=x.shape, dtype=theano.config.floatX) y_hat = theano.gradient.disconnected_grad(y) grad = T.grad(get_kl(y,y_hat,main_obj_type).mean(), wrt=x) #power_iteration for power_iter in xrange(num_power_iter): d = get_normalized_vector(d) Hd = T.grad(T.sum(grad*d),wrt=x) Hd = theano.gradient.disconnected_grad(Hd) d = Hd r_vadv = get_perturbation(d,epsilon,norm_constraint) return -get_kl(forward_func(x+r_vadv),y_hat,main_obj_type,include_ent_term=True)
def test_examples_9(self): from theano.tensor.shared_randomstreams import RandomStreams srng = RandomStreams(seed=234) rv_u = srng.uniform((2,2)) rv_n = srng.normal((2,2)) f = function([], rv_u) g = function([], rv_n, no_default_updates=True) #Not updating rv_n.rng nearly_zeros = function([], rv_u + rv_u - 2 * rv_u) f_val0 = f() f_val1 = f() #different numbers from f_val0 assert numpy.all(f_val0 != f_val1) g_val0 = g() # different numbers from f_val0 and f_val1 g_val1 = g() # same numbers as g_val0 !!! assert numpy.all(g_val0 == g_val1) assert numpy.all(g_val0 != f_val0) assert numpy.all(g_val0 != f_val1) nearly_zeros = function([], rv_u + rv_u - 2 * rv_u) assert numpy.allclose(nearly_zeros(), [[0.,0.],[0.,0.]]) rng_val = rv_u.rng.get_value(borrow=True) # Get the rng for rv_u rng_val.seed(89234) # seeds the generator rv_u.rng.set_value(rng_val, borrow=True) # Assign back seeded rng srng.seed(902340) # seeds rv_u and rv_n with different seeds each state_after_v0 = rv_u.rng.get_value().get_state() nearly_zeros() # this affects rv_u's generator v1 = f() rng = rv_u.rng.get_value(borrow=True) rng.set_state(state_after_v0) rv_u.rng.set_value(rng, borrow=True) v2 = f() # v2 != v1 v3 = f() # v3 == v1 assert numpy.all(v1 != v2) assert numpy.all(v1 == v3)
def test_basics(self): random = RandomStreams(utt.fetch_seed()) fn = function([], random.uniform((2, 2)), updates=random.updates()) gn = function([], random.normal((2, 2)), updates=random.updates()) fn_val0 = fn() fn_val1 = fn() gn_val0 = gn() rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30) rng = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit # print fn_val0 numpy_val0 = rng.uniform(size=(2, 2)) numpy_val1 = rng.uniform(size=(2, 2)) # print numpy_val0 assert numpy.allclose(fn_val0, numpy_val0) print(fn_val0) print(numpy_val0) print(fn_val1) print(numpy_val1) assert numpy.allclose(fn_val1, numpy_val1)