def theano_print_shape(var, msg): """ Helper function for printing the shape of a Theano expression during run time of the Theano graph. Parameters ---------- var : Theano expression The variable whose shape to be printed at runtime. msg : str The message to be printed together with the shape. Returns ------- A Theano expression which should be used instead of the original expression in order the printing to happen. """ if var.ndim == 0: pr = Print(msg + "(SCALAR)")(var) return T.switch(T.lt(0, 1), var, pr) else: pr = Print(msg)(T.shape(var)) return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
def theano_print_value(var, msg): if var.ndim == 0: pr = Print(msg + "(SCALAR)")(var) return T.switch(T.lt(0, 1), var, pr) else: pr = Print(msg)(var) return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
def dropout_theano(x, level, seed=None): print("-----Input before dropout-------") Print(x) print("----------------------------------") if level < 0. or level >= 1: raise Exception('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) print("-----Random stream-------", rng) retain_prob = 1. - level x *= rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) x /= retain_prob print("-----Input after dropout-------") Print(x) return x
def dynamic_scale(self, state_below): """ .. todo:: WRITEME """ self.input_space.validate(state_below) if self.requires_reformat: state_below = self.input_space.format_as(state_below, self.desired_space) z = self.transformer.lmul(state_below) + self.b if not hasattr(self, 'randomize_pools'): self.randomize_pools = False if not hasattr(self, 'pool_stride'): self.pool_stride = self.pool_size if self.randomize_pools: z = T.dot(z, self.permute) if not hasattr(self, 'min_zero'): self.min_zero = False if self.min_zero: p = 0. else: p = None last_start = self.detector_layer_dim - self.pool_size for i in xrange(self.pool_size): cur = z[:, i:last_start + i + 1:self.pool_stride] if p is None: p = cur else: p = T.maximum(cur, p) cost = p.sum() mask = T.grad(cost, z) counts = mask.sum(axis=0) reweight = T.cast(mask.shape[0], config.floatX) / T.clip( counts, 1.0, 1e6) reweight = Print('reweight', attrs=['min', 'mean', 'max'])(reweight) params = self.get_params() rval = OrderedDict() for param in params: rval[param] = reweight return rval
def cosine_dist(tensor, matrix): """ Along axis 1 for both inputs. Assumes dimensions 0 and 1 are equal """ matrix_norm = T.shape_padright(matrix.norm(2, axis=1)) tensor_norm = tensor.norm(2, axis=1) norm_ = (matrix_norm * tensor_norm) norm_ = Print('norm_')(norm_) return T.batched_dot(matrix, tensor) / norm_
def apply(self, source): x_linear = self.x_to_h.apply( source.reshape( (source.shape[1], source.shape[0], source.shape[2]))) x_linear.name = 'x_linear' if self.print_intermediate: x_linear = Print(message='x_linear info', attrs=self.print_attrs)(x_linear) h, c = self.lstm.apply(x_linear) if self.print_intermediate: h = Print(message="hidden states info", attrs=self.print_attrs)(h) y_hat = self.h_to_o.apply(h) y_hat.name = 'y_hat' if self.print_intermediate: y_hat = Print(message="y_hat info", attrs=self.print_attrs)(y_hat) return y_hat
def get_reconstruction_func(): V = model.get_input_space().make_theano_batch(name="V") assert V.dtype == 'float32' if hasattr(model, 'e_step'): #S3C mf = model.e_step.variational_inference(V) H = mf['H_hat'] S = mf['S_hat'] Z = H * S recons = T.dot(Z, model.W.T) elif hasattr(model, 's3c'): #PDDBM mf = model.inference_procedure.infer(V) H = mf['H_hat'] S = mf['S_hat'] Z = H * S recons = T.dot(Z, model.s3c.W.T) else: #RBM if corrupt > -1.: from pylearn2.corruption import GaussianCorruptor c = GaussianCorruptor(stdev=corrupt) corrupted_V = c(V) H = model.mean_h_given_v(corrupted_V) from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams theano_rng = RandomStreams(42) H_sample = theano_rng.binomial(size=H.shape, p=H) from theano.printing import Print H_sample = Print('H_sample', attrs=['mean'])(H_sample) H_sample = T.cast(H, 'float32') recons = model.mean_v_given_h(H_sample) recons = Print('recons', attrs=['min', 'mean', 'max'])(recons) rval = function([V], recons) return rval
def clip_gradients_norm(gradients, threshold, parameters, fix_nan=False): gradient_sqr_vec = T.concatenate([T.sqr(g.flatten()) for g in gradients]) gradient_norm = T.sqrt(gradient_sqr_vec.sum()) rescale = T.maximum(gradient_norm, threshold) if fix_nan: isnan = T.or_(T.isnan(gradient_norm), T.isinf(gradient_norm)) else: isnan = None rv = [] for i, g in enumerate(gradients): if fix_nan: alt_g = 0.1 * parameters[i] print_alt_g = Print( "NaN detected! Fixing with pseudogradient with mean:", ["mean"])(alt_g) new_g = T.switch(isnan, print_alt_g, g / rescale) else: new_g = g / rescale rv.append(new_g) return rv
def multivariate_normal_nohypers(datasets, weights, hyperparams, residuals): """ Calculate posterior Likelihood of a Multivariate Normal distribution. Uses plain inverse of the covariances. DEPRECATED! Is currently not being used in beat. Can only be executed in a `with model context`. Parameters ---------- datasets : list of :class:`heart.SeismicDataset` or :class:`heart.GeodeticDataset` weights : list of :class:`theano.shared` Square matrix of the inverse of the covariance matrix as weights hyperparams : dict of :class:`theano.` residual : list or array of model residuals Returns ------- array_like """ n_t = len(datasets) logpts = tt.zeros((n_t), 'float64') for l, data in enumerate(datasets): M = tt.cast(shared( data.samples, name='nsamples', borrow=True), 'int16') maha = residuals[l].dot(weights[l]).dot(residuals[l].T) slogpdet = Print('theano logpdet')(data.covariance.slog_pdet) logpts = tt.set_subtensor( logpts[l:l + 1], (-0.5) * ( M * log_2pi + slogpdet + maha )) return logpts
from pylearn2.utils import serial model = serial.load('rectifier_7.pkl') import theano.tensor as T X = T.matrix() state = model.fprop(X) target = T.matrix() right_cost = model.layers[-1].kl(Y=target, Y_hat=state) wrong_cost = model.layers[-1].kl(Y=target[::-1,:], Y_hat=state) <<<<<<< HEAD #from theano.printing import Print #right_cost = Print('right_cost')(right_cost) ======= from theano.printing import Print right_cost = Print('right_cost')(right_cost) >>>>>>> 4cd43f0de3b75082a22881d3cefe26f82bf3d582 acc = (wrong_cost > right_cost).mean() from theano import function f = function([X, target], acc) acc = f(dataset.X, dataset.y) print acc
def print_tensor(x, message=''): '''Print the message and the tensor when evaluated and return the same tensor. ''' p_op = Print(message) return p_op(x)
def grad_dir_func( pdf ): grad = T.grad(pdf.sum(), X) grad = Print('before',attrs=['min','max'])(grad) grad /= T.sqrt(1e-15+T.sum(T.sqr(grad),axis=1).dimshuffle(0,'x')) grad = Print('after',attrs=['min','max'])(grad) return FuckYouTheano(function([X],grad))
# First, we inspect the compiled graph to verify that it does not use the # softmax op. # Second, we run the same functionality without the Print op. We verify that # the softmax op appears in the compiled graph, and we verify that the new # graph gets the correct output. from ex_03_detect_op_soln import contains_softmax import numpy as np from theano import function from theano.printing import Print import theano.tensor as T X = T.matrix() p_tilde = T.exp(X) p_tilde = Print('p_tilde', attrs=['min', 'max'])(p_tilde) denom = p_tilde.sum(axis=1, keepdims=True) p = p_tilde / denom f = function([X], p) assert not contains_softmax(f) X = -1000. * np.ones((2, 2)).astype(X.dtype) output = f(X) assert np.all(np.isnan(output)) X = T.matrix() p_tilde = T.exp(X)
def apply_nan_suppression(updates, print_mode='all'): """Returns a modified update dictionary replacing updates containing non-finite values with no-op updates If any NaN or infinity values are found in the new_expression (second) half of an update, the update is replaced with the do-nothing update (shared_variable, shared_variable). This can be used to patch over the most intransigent, slippery instances of NaNs creeping into training, if they appear rarely and one is reasonably sure that the problem is not fundamental to the model. Parameters ---------- updates : OrderedDict A dictionary mapping parameters to update expressions print_mode : str If ``'all'``, print a debugging message containing the name of the shared variable and its suppressed update value whenever a non-finite value is detected. If ``'shape'``, print only the name of the variable and the shape of the update value. If ``'none'``, suppress NaNs silently without printing anything. Returns ------- OrderedDict A copy of `updates` with expressions containing non-finite values replaced by the original value. Examples -------- >>> param = theano.shared(np.array([0., 0.], dtype=np.float32), ... name='param') >>> inc = T.fvector('inc') >>> updates = OrderedDict([(param, param + inc)]) >>> safe_updates = apply_nan_suppression(updates) >>> func = theano.function([inc], safe_updates[param], ... updates=safe_updates) >>> func([1., 2.]) array([ 1., 2.], dtype=float32) >>> func([2., float('nan')]) Warning: non-finite update suppressed for param: __str__ = [ 3. nan] array([ 1., 2.], dtype=float32) """ new_updates = OrderedDict([]) for shared_variable, new_expression in updates.iteritems(): isnan = T.isnan(new_expression).any() | T.isinf(new_expression).any() warning_msg = 'Warning: non-finite update suppressed for %s' if print_mode == 'all': suppressed = T.zeros_like( Print((warning_msg + ':') % shared_variable.name)(new_expression)) elif print_mode == 'shape': suppressed = T.zeros_like( Print((warning_msg + ':') % shared_variable.name, attrs=('shape', ))(new_expression)) elif print_mode == 'none' or print_mode is None: suppressed = T.zeros_like(new_expression) else: raise ValueError( "print_mode must be one of 'all', 'shape', or 'none'") # For some reason, the ifelse needs to be used in a calculation, or the # Print gets optimized away. So we can't do # suppressed = (zeros_like(Print('warning')(new_expression)) + # shared_variable) # ifelse(isnan, suppressed, new_expression) new_updates[shared_variable] = shared_variable + ifelse( isnan, suppressed, new_expression - shared_variable) return new_updates
if t not in [ENERGY, SCORE, SCORED]: g.components.append(HeatMap( f = function([X], model.free_energy(X)), normalizer = None )) offset = g.render().mean() # if t == ENERGY: df = dataset.free_energy_func mfe = model.free_energy(X) mf = function([X],mfe) normalizer = energy_normalizer elif t == PDF: df = dataset.pdf_func mfe = model.free_energy(X) mfe = Print('model free energy',attrs=['min','max'])(mfe) mf = function([X], T.exp(-mfe+offset)) normalizer = pdf_normalizer elif t == GPDF: df = grad_func(dataset.pdf(X)) mf = grad_func(T.exp(-model.free_energy(X)+offset)) normalizer = gpdf_normalizer elif t == GDPDF: df = grad_dir_func(dataset.pdf(X)) mf = grad_dir_func(T.exp(-model.free_energy(X)+offset)) normalizer = gdpdf_normalizer elif t == SCORE: df = grad_func(- dataset.free_energy(X))
def theano_print_min_max_vals(var, msg): pr = Print(msg)(T.stack((T.min(var), T.max(var)))) return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
def theano_print_vals(var, msg): return Print(msg)(var)
def recurrence(i, h_tm1, w_a, M_a, *args, **kwargs): """ notes Headers from paper in all caps mem = n_article slots if is_article else n_title_slots :param i: center index of sliding window :param h_tm1: h_{t-1} (hidden state) :param w_a: attention weights for article memory :param M_a: article memory :param args: gru_weights, maybe w_t, maybe M_t gru_weights: weights with which to initialize GRULayer on each time step w_t: attention weights for titles memory M_t: titles memory :param kwargs: is_training, is_article is_training: is_article: we use different parts of memory when working with a article :return: [y = model outputs, i + 1 = increment index, h w, M (see above)] """ is_training = kwargs['is_training'] is_article = kwargs['is_article'] gru_weights = args[:depth] if len(args) > depth: w_t = args[depth] M_t = args[depth + 1] i_type = T.iscalar if is_article or is_training else T.ivector assert i.type == i_type if not is_article: assert w_t is not None and M_t is not None word_idxs = i if is_article or is_training: # get representation of word window document = articles if is_article else titles # [instances, bucket_width] word_idxs = document[:, i:i + 1] # [instances, 1] # x_i = self.emb[word_idxs].flatten(ndim=2) # [instances, embedding_dim] input = InputLayer(shape=(None, 1), input_var=word_idxs) embed = EmbeddingLayer(input, num_embeddings, embedding_dim) gru = GRULayer(incoming=embed, num_units=embedding_dim, hid_init=self.gru0) for weight in gru_weights: gru = GRULayer(incoming=gru, num_units=embedding_dim, hid_init=weight) x_i = get_output(gru).flatten(ndim=2) x_i = Print('x_i')(x_i) # [instances, embedding_dim] gru_weights = [] if is_article: M_read = M_a # [instances, memory_size, n_article_slots] w_read = w_a # [instances, n_article_slots] else: M_read = T.concatenate( [M_a, M_t], axis=2) # [instances, memory_size, n_title_slots] w_read = T.concatenate([w_a, w_t], axis=1) # [instances, n_title_slots] # eqn 15 c = T.batched_dot(M_read, w_read) # [instances, memory_size] # EXTERNAL MEMORY READ def get_attention(Wg, bg, M, w): g = T.nnet.sigmoid(T.dot(x_i, Wg) + bg) # [instances, mem] # eqn 11 k = T.dot(h_tm1, self.Wk) + self.bk # [instances, memory_size] # eqn 13 beta = T.dot(h_tm1, self.Wb) + self.bb beta = T.nnet.softplus(beta) beta = T.addbroadcast(beta, 1) # [instances, 1] # eqn 12 w_hat = T.nnet.softmax(beta * cosine_dist(M, k)) # eqn 14 return (1 - g) * w + g * w_hat # [instances, mem] w_a = get_attention(self.Wg_a, self.bg_a, M_a, w_a) # [instances, n_article_slots] if not is_article: w_t = get_attention(self.Wg_t, self.bg_t, M_t, w_t) # [instances, n_title_slots] # MODEL INPUT AND OUTPUT # eqn 9 h = T.dot(c, self.Wh) + T.dot( x_i, self.Wx) + self.bh # [instances, hidden_size] # eqn 10 y = T.nnet.softmax(T.dot(h, self.W) + self.b) # [instances, nclasses] # EXTERNAL MEMORY UPDATE def update_memory(We, be, w_update, M_update): # eqn 17 e = T.nnet.sigmoid(T.dot(h_tm1, We) + be) # [instances, mem] f = 1. - w_update * e # [instances, mem] # eqn 16 v = T.tanh(T.dot(h, self.Wv) + self.bv) # [instances, memory_size] # need to add broadcast layers for memory update f = f.dimshuffle(0, 'x', 1) # [instances, 1, mem] u = w_update.dimshuffle(0, 'x', 1) # [instances, 1, mem] v = v.dimshuffle(0, 1, 'x') # [instances, memory_size, 1] # eqn 19 return M_update * f + T.batched_dot(v, u) * ( 1 - f) # [instances, memory_size, mem] M_a = update_memory(self.We_a, self.be_a, w_a, M_a) attention_and_memory = [w_a, M_a] if not is_article: M_t = update_memory(self.We_t, self.be_t, w_t, M_t) attention_and_memory += [w_t, M_t] y_max = y.argmax(axis=1).astype(int32) next_idxs = i + 1 if is_training or is_article else y_max return [y, y_max, next_idxs, h] + attention_and_memory
def theano_print_shape(var, msg): pr = Print(msg)(T.shape(var)) return T.switch(T.lt(0, 1), var, T.cast(pr[0], var.dtype))
def print_(x, message=''): return Print(message)(x)