def __init__(self, rng, W=None, m=1.0, n_samples=50, shape=None, batch_size=1000): if W is None: W = numpy.asarray(rng.uniform( low=-numpy.sqrt(6. / (shape[0] + shape[1])), high=numpy.sqrt(6. / (shape[0] + shape[1])), size=(shape[0], shape[1])), dtype=theano.config.floatX) self.W = theano.shared(value=W, name='Hashtag_emb', borrow=True) self.batch_size = batch_size self.n_ht = W.shape[0] self.m = m self.n_samples = n_samples self.csrng = CURAND_RandomStreams(123) mask = self.csrng.uniform(size=(self.n_samples, 1), low=0.0, high=1.0, dtype=theano.config.floatX) self.rfun = theano.function([], mask.argsort(axis=0)) self.alpha = T.constant( 1.0 / numpy.arange(start=1, stop=self.n_ht + 1, step=1)) self.weights = [self.W] self.biases = []
def compare_speed(): # To run this speed comparison # cd <directory of this file> # THEANO_FLAGS=device=gpu \ # python -c 'import test_rng_curand; test_rng_curand.compare_speed()' mrg = MRG_RandomStreams() crn = CURAND_RandomStreams(234) N = 1000 * 100 dest = theano.shared(numpy.zeros(N, dtype=theano.config.floatX)) mrg_u = theano.function([], [], updates={dest: mrg.uniform((N,))}, profile='mrg uniform') crn_u = theano.function([], [], updates={dest: crn.uniform((N,))}, profile='crn uniform') mrg_n = theano.function([], [], updates={dest: mrg.normal((N,))}, profile='mrg normal') crn_n = theano.function([], [], updates={dest: crn.normal((N,))}, profile='crn normal') for f in mrg_u, crn_u, mrg_n, crn_n: # don't time the first call, it has some startup cost print('DEBUGPRINT') print('----------') theano.printing.debugprint(f) for i in range(100): for f in mrg_u, crn_u, mrg_n, crn_n: # don't time the first call, it has some startup cost f.fn.time_thunks = (i > 0) f()
def sampler(self, mu, log_sigma): if "gpu" in theano.config.device: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams srng = CURAND_RandomStreams(seed=seed) # srng = T.shared_randomstreams.RandomStreams(seed=seed) else: srng = T.shared_randomstreams.RandomStreams(seed=seed) eps = srng.normal(mu.shape) # Reparametrize z = mu + (T.exp(0.5 * log_sigma) - 1) * eps * 5e-1 return z
def __init__(self, rng, clean_input=None, fuzzy_input=None, \ in_dim=0, out_dim=0, activation=None, input_noise=0., \ W=None, b_h=None, b_v=None): # Setup a shared random generator for this layer #self.rng = theano.tensor.shared_randomstreams.RandomStreams( \ # rng.randint(100000)) self.rng = CURAND_RandomStreams(rng.randint(1000000)) # Grab the layer input and perturb it with some sort of noise. This # is, afterall, a _denoising_ autoencoder... self.clean_input = clean_input self.noisy_input = self._get_noisy_input(fuzzy_input, input_noise) # Set some basic layer properties self.activation = activation self.in_dim = in_dim self.out_dim = out_dim # Get some random initial weights and biases, if not given if W is None: W_init = np.asarray(0.01 * rng.standard_normal( \ size=(in_dim, out_dim)), dtype=theano.config.floatX) W = theano.shared(value=W_init, name='W') if b_h is None: b_init = np.zeros((out_dim, ), dtype=theano.config.floatX) b_h = theano.shared(value=b_init, name='b_h') if b_v is None: b_init = np.zeros((in_dim, ), dtype=theano.config.floatX) b_v = theano.shared(value=b_init, name='b_v') # Grab pointers to the now-initialized weights and biases self.W = W self.b_h = b_h self.b_v = b_v # Put the learnable/optimizable parameters into a list self.params = [self.W, self.b_h, self.b_v] # Beep boop... layer construction complete... return
def __init__(self, rng, input=None, filt_def=None, pool_def=(2, 2), \ activation=None, drop_rate=0., input_noise=0., bias_noise=0., \ W=None, b=None, name="", W_scale=1.0): # Setup a shared random generator for this layer #self.rng = theano.tensor.shared_randomstreams.RandomStreams( \ # rng.randint(100000)) self.rng = CURAND_RandomStreams(rng.randint(1000000)) self.clean_input = input # Add gaussian noise to the input (if desired) if (input_noise > 1e-4): self.fuzzy_input = input + self.rng.normal(size=input.shape, \ avg=0.0, std=input_noise, dtype=theano.config.floatX) else: self.fuzzy_input = input # Apply masking noise to the input (if desired) if (drop_rate > 1e-4): self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate) else: self.noisy_input = self.fuzzy_input # Set the activation function for the conv filters if activation: self.activation = activation else: self.activation = lambda x: relu_actfun(x) # initialize weights with random weights W_init = 0.01 * np.asarray(rng.normal( \ size=filt_def), dtype=theano.config.floatX) self.W = theano.shared(value=(W_scale*W_init), \ name="{0:s}_W".format(name)) # the bias is a 1D tensor -- one bias per output feature map b_init = np.zeros((filt_def[0], ), dtype=theano.config.floatX) + 0.1 self.b = theano.shared(value=b_init, name="{0:s}_b".format(name)) # convolve input feature maps with filters input_c01b = self.noisy_input.dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_c01b = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b conv_op = FilterActs(stride=1, partial_sum=1) contig_input = gpu_contiguous(input_c01b) contig_filters = gpu_contiguous(filters_c01b) conv_out_c01b = conv_op(contig_input, contig_filters) if (bias_noise > 1e-4): noisy_conv_out_c01b = conv_out_c01b + self.rng.normal( \ size=conv_out_c01b.shape, avg=0.0, std=bias_noise, \ dtype=theano.config.floatX) else: noisy_conv_out_c01b = conv_out_c01b # downsample each feature map individually, using maxpooling pool_op = MaxPool(ds=pool_def[0], stride=pool_def[1]) mp_out_c01b = pool_op(noisy_conv_out_c01b) mp_out_bc01 = mp_out_c01b.dimshuffle(3, 0, 1, 2) # c01b to bc01 # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.noisy_linear_output = mp_out_bc01 + self.b.dimshuffle( 'x', 0, 'x', 'x') self.linear_output = self.noisy_linear_output self.output = self.activation(self.noisy_linear_output) # store parameters of this layer self.params = [self.W, self.b] return
def __init__(self, rng, input, in_dim, out_dim, \ activation=None, pool_size=0, \ drop_rate=0., input_noise=0., bias_noise=0., \ W=None, b=None, name="", W_scale=1.0): # Setup a shared random generator for this layer #self.rng = theano.tensor.shared_randomstreams.RandomStreams( \ # rng.randint(100000)) self.rng = CURAND_RandomStreams(rng.randint(1000000)) self.clean_input = input # Add gaussian noise to the input (if desired) if (input_noise > 1e-4): self.fuzzy_input = input + self.rng.normal(size=input.shape, \ avg=0.0, std=input_noise, dtype=theano.config.floatX) else: self.fuzzy_input = input # Apply masking noise to the input (if desired) if (drop_rate > 1e-4): self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate) else: self.noisy_input = self.fuzzy_input # Set some basic layer properties self.pool_size = pool_size self.in_dim = in_dim self.out_dim = out_dim if self.pool_size <= 1: self.filt_count = self.out_dim else: self.filt_count = self.out_dim * self.pool_size self.pool_count = self.filt_count / max(self.pool_size, 1) if activation: self.activation = activation else: if self.pool_size <= 1: self.activation = lambda x: relu_actfun(x) else: self.activation = lambda x: \ maxout_actfun(x, self.pool_size, self.filt_count) # Get some random initial weights and biases, if not given if W is None: if self.pool_size <= 1: # Generate random initial filters in a typical way W_init = 0.01 * np.asarray(rng.normal( \ size=(self.in_dim, self.filt_count)), \ dtype=theano.config.floatX) else: # Generate groups of random filters to pool over such that # intra-group correlations are stronger than inter-group # correlations, to encourage pooling over similar filters... filters = [] f_size = (self.in_dim, 1) for g_num in range(self.pool_count): g_filt = 0.01 * rng.normal(size=f_size) for f_num in range(self.pool_size): f_filt = g_filt + 0.003 * rng.normal(size=f_size) filters.append(f_filt) W_init = np.hstack(filters).astype(theano.config.floatX) W = theano.shared(value=(W_scale * W_init), name="{0:s}_W".format(name)) if b is None: b_init = np.zeros((self.filt_count, ), dtype=theano.config.floatX) b = theano.shared(value=b_init, name="{0:s}_b".format(name)) # Set layer weights and biases self.W = W self.b = b # Compute linear "pre-activation" for this layer self.linear_output = T.dot(self.noisy_input, self.W) + self.b # Add noise to the pre-activation features (if desired) if bias_noise > 1e-3: self.noisy_linear = self.linear_output + \ self.rng.normal(size=self.linear_output.shape, \ avg=0.0, std=bias_noise, dtype=theano.config.floatX) else: self.noisy_linear = self.linear_output # Apply activation function self.output = self.activation(self.noisy_linear) # Compute some properties of the activations, probably to regularize self.act_l2_sum = T.sum(self.output**2.) / self.output.size self.row_l1_sum = T.sum(abs(row_normalize(self.output))) / \ self.output.shape[0] self.col_l1_sum = T.sum(abs(col_normalize(self.output))) / \ self.output.shape[1] # Conveniently package layer parameters self.params = [self.W, self.b] # Layer construction complete... return