def normalize_mst_data(__mst_data, avg, std): _sAvg = theano.shared(avg.T.astype(fpX)[np.newaxis, :, :, np.newaxis]) _sStd = theano.shared(std.T.astype(fpX)[np.newaxis, :, :, np.newaxis]) ### set the broadcastability of the sample axis _sAvg = T.patternbroadcast(_sAvg, (True, False, False, False)) _sStd = T.patternbroadcast(_sStd, (True, False, False, False)) return (__mst_data - _sAvg) / _sStd, [_sAvg, _sStd]
def grad(self, inp, grads): bottom, weights = inp top, = grads d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)( weights, top, bottom.shape[-2:]) d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)( bottom, top, weights.shape[-2:]) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_bottom = bottom.type.filter_variable(d_bottom) d_weights = patternbroadcast(d_weights, weights.broadcastable) d_weights = weights.type.filter_variable(d_weights) return d_bottom, d_weights
def apply(self, input_): aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b] # NOTE: don't put batch_stats on self because apply may be # called multiple times batch_stats = dict( (stat, getattr(input_, stat)(axis=aggregate_axes, keepdims=True)) for stat in self.stats) for stat, role in self.roles.items(): graph.add_transform([batch_stats[stat]], graph.ConstantTransform( # adding zero to ensure it's a TensorType(float32, row) # just like the corresponding batch_stat, rather than a # CudaNdarray(float32, row). -__- 0 + T.patternbroadcast( self.population_stats[stat], [True] + self.broadcastable)), reason="population_normalization") # make the batch statistics identifiable to get_updates() below add_role(batch_stats[stat], self.roles[stat]) batch_stats[stat] = self.annotated_statistic(batch_stats[stat]) gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable) beta = T.patternbroadcast(self.beta, [True] + self.broadcastable) return theano.tensor.nnet.bn.batch_normalization( inputs=input_, gamma=gamma, beta=beta, mean=batch_stats["mean"], std=T.sqrt(batch_stats["var"] + self.epsilon))
def make_normalize_mst_data(_mst_data, nf, nv): _sAvg = theano.shared(np.zeros(shape=(1, nf, nv, 1), dtype=fpX)) _sStd = theano.shared(np.zeros(shape=(1, nf, nv, 1), dtype=fpX)) return (_mst_data - T.patternbroadcast(_sAvg, (True, False, False, False)) ) / T.patternbroadcast(_sStd, (True, False, False, False)), [ _sAvg, _sStd ]
def filterbank(center, width, logsigma2, shape): assert len(shape) == 3 batch_size, window_size, image_size = shape w = T.patternbroadcast( T.arange(window_size, dtype='float32').reshape((1, window_size, 1)), [True, False, True], ) i = T.patternbroadcast( T.arange(image_size, dtype='float32').reshape((1, 1, image_size)), [True, True, False], ) center = T.patternbroadcast(center.reshape((batch_size, 1, 1)), [False, True, True]) width = T.patternbroadcast(width.reshape((batch_size, 1, 1)), [False, True, True]) logsigma2 = T.patternbroadcast(logsigma2.reshape((batch_size, 1, 1)), [False, True, True]) mu = (image_size - 1) * \ ((1 + center) / 2 + width * (w / (window_size - 1) - 0.5)) F = T.exp(-(mu - i)**2 / (2 * T.exp(logsigma2 / 2))) F = F / T.maximum(T.sum(F, 2, keepdims=True), 1e-7) return F
def grad(self, inp, grads): bottom, top = inp[:2] weights, = grads d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)( weights, top, bottom.shape[-2:]) d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. # Also make sure that the gradient lives on the same device than # the corresponding input. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_bottom = bottom.type.filter_variable(d_bottom) d_top = patternbroadcast(d_top, top.broadcastable) d_top = top.type.filter_variable(d_top) d_height_width = (theano.gradient.DisconnectedType()(),) return (d_bottom, d_top) + d_height_width
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:] x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)]) x = T.squeeze(x) x = T.patternbroadcast(x, broadcastable) return x
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' broadcastable = x.broadcastable[:axis] + x.broadcastable[axis + 1:] x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)]) x = T.squeeze(x) x = T.patternbroadcast(x, broadcastable) return x
def get_output_for(self,input, **kwargs): if input.ndim > 2: input = inpu.flatten(2) inputData = input * 10 inputData.name = 'inputData' inputData_reshape = inputData.dimshuffle(0, 'x', 'x', 1) inputData_reshape.name = 'inputData_reshape' inputData_reshape = T.patternbroadcast(inputData_reshape, (False, True, True, False)) #mean_reshape has dimension: (1, NumofClass, NumofComponent, p) mean_reshape = self._means.dimshuffle('x', 0, 1, 2) mean_reshape = T.patternbroadcast(mean_reshape, (True, False, False,False)) mean_reshape.name = 'mean_reshape' #self.sigma = nonlinearities.rectify(self.sigma) + T.ones_like(self.sigma) sigma = T.exp(self.sigma) sigma_reshape = sigma.dimshuffle('x', 0, 1, 2) sigma_reshape = T.patternbroadcast(sigma_reshape, (True, False, False, False)) sigma_reshape.name = 'sigma_reshape' #self.weights = nonlinearities.rectify(self.weights) + 1e-16 weights = T.exp(self.weights) weights_sum = T.sum(weights, axis = 1) weights_sum = T.patternbroadcast(weights_sum.dimshuffle(0,'x'), (False, True)) weights = weights / weights_sum weights_reshape = weights.dimshuffle('x', 0, 1) weights_reshape = T.patternbroadcast(weights_reshape, (True, False, False)) weights_reshape.name = 'weights_reshape' sigma_inverse_sqrt = T.sqrt(1.0/sigma_reshape) sigma_inverse_sqrt.name = 'sigma_inverse_sqrt' # positive: sqrtTemp = T.sqr((inputData_reshape - mean_reshape) * sigma_inverse_sqrt).sum(axis = 3) # negative: 784 * log(sigma) ? sigma = 0.1 -> -1805, else positive. sigmaTemp = T.log(sigma_reshape).sum(axis = 3) # positive:28x28 dimension, then we have 784 * log(2\pi) = 1440 dimTemp = T.ones((self.num_models, self.num_components), 'float32') * self.dim * T.log(2.0 * np.pi) logComponentOutput = - 1.0 / 2 * (sqrtTemp + sigmaTemp + dimTemp) #logComponentOutput = -1.0/2 * sqrtTemp logComponentOutput.name = 'logComponentOutput' logComponentSum = logComponentOutput + T.log(weights_reshape) logComponentSum.name = 'logComponentSum' logComponentSum_max = logComponentSum.max(axis = 2) logComponentSum_max_reshape = logComponentSum_max.dimshuffle(0, 1, 'x') componentSum_before = T.exp(logComponentSum - logComponentSum_max_reshape) componentSum_before_sum = componentSum_before.sum(axis = 2) addLog = T.log(componentSum_before_sum + T.ones_like(componentSum_before_sum)) + logComponentSum_max #addLog = (componentSum_before + T.ones_like().sum(axis = 2) #return logComponentOutput, sqrtTemp, sigmaTemp, dimTemp, logComponentSum, logComponentSum_mean_reshape, componentSum_before, addLog, classSum return addLog
def _train_fprop(self, state_below): if self.layer_type == "fc": miu = state_below.mean(axis=0) var = T.mean((state_below - miu) ** 2, axis=0) elif self.layer_type == "conv": miu = state_below.mean(axis=(0, 2, 3), keepdims=True) var = T.mean((state_below - miu) ** 2, axis=(0, 2, 3), keepdims=True) self.moving_mean = self.mem * miu + (1 - self.mem) * self.moving_mean self.moving_var = self.mem * var + (1 - self.mem) * self.moving_var Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var + self.epsilon) gamma = T.patternbroadcast(self.gamma, self.broadcastable) beta = T.patternbroadcast(self.beta, self.broadcastable) return gamma * Z + beta
def f(W_0, W_1): index = 0 d = { b_layers[0].W: T.patternbroadcast( W_0, (False, False, False, False) ), b_layers[1].W: T.patternbroadcast( W_1, (False, False, False, False) ), b_x: train_set_x_b[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size] } return theano.clone(b_cost, d)
def _train_fprop(self, state_below): if self.layer_type == 'fc': miu = state_below.mean(axis=0) var = T.mean((state_below - miu)**2, axis=0) elif self.layer_type == 'conv': miu = state_below.mean(axis=(0, 2, 3), keepdims=True) var = T.mean((state_below - miu)**2, axis=(0, 2, 3), keepdims=True) self.moving_mean = self.mem * miu + (1 - self.mem) * self.moving_mean self.moving_var = self.mem * var + (1 - self.mem) * self.moving_var Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var + self.epsilon) gamma = T.patternbroadcast(self.gamma, self.broadcastable) beta = T.patternbroadcast(self.beta, self.broadcastable) return gamma * Z + beta
def compute_output(self, network, in_vw): # gather hyperparameters initial_alpha = network.find_hyperparameter(["initial_alpha"], 0.25) # calculate_shape ndim = in_vw.ndim parameter_axes = treeano.utils.find_axes( network, ndim, positive_keys=["parameter_axes"], negative_keys=["non_parameter_axes"], positive_default=[treeano.utils.nth_non_batch_axis(network, 0)]) broadcastable = tuple([i not in parameter_axes for i in range(ndim)]) shape = tuple( [1 if b else s for b, s in zip(broadcastable, in_vw.shape)]) # create state alpha_vw = network.create_vw( "alpha", is_shared=True, shape=shape, tags={"parameter", "bias"}, default_inits=[treeano.inits.ConstantInit(initial_alpha)], ) alpha = T.patternbroadcast(alpha_vw.variable, broadcastable) # return output network.create_vw( "default", variable=treeano.utils.rectify(in_vw.variable, negative_coefficient=alpha), shape=in_vw.shape, tags={"output"}, )
def random_node(old): """Creates random node with shared params Parameters ---------- old : pm.FreeRV Returns ------- tuple : (new node, shared mu, shared rho) """ if len(old.broadcastable) > 0: rho = theano.shared( np.ones(old.tag.test_value.shape), name='{}_rho_shared'.format(old.name), broadcastable=old.broadcastable) mu = theano.shared( old.tag.test_value, name='{}_mu_shared'.format(old.name), broadcastable=old.broadcastable) e = tt.patternbroadcast( tt_rng().normal(rho.shape), old.broadcastable) else: rho = theano.shared( np.ones(old.tag.test_value.shape), name='{}_rho_shared'.format(old.name)) mu = theano.shared( old.tag.test_value, name='{}_mu_shared'.format(old.name)) e = tt_rng().normal(rho.shape) return mu + rho2sd(rho) * e, mu, rho
def local_gpualloc(node): replace = False if node.op == tensor.alloc: if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu: replace = True elif all([ c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients ]): replace = True elif all([ c != 'output' and c.op == tensor.join and all([ i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:] ]) for c, idx in node.outputs[0].clients ]): replace = True if replace: val = node.inputs[0] shp = node.inputs[1:] old_out = node.outputs[0] val2 = tensor.shape_padleft(val, len(shp) - val.ndim) new_out = host_from_gpu(gpu_alloc(val, *shp)) if new_out.type != old_out.type: assert new_out.type.ndim == old_out.type.ndim assert new_out.type.dtype == old_out.type.dtype for b_old, b_new in zip(old_out.type.broadcastable, new_out.type.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out.old_out.broadcastable) return [new_out]
def apply_dropout(self, input, const=0): # Using theano constant to prevent upcasting one = T.constant(1) if self.rescale: input /= self.q # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=self.q, dtype=input.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) if const != 0: return (input * mask) + (const * (T.constant(1) - mask)) else: return input * mask
def __init__( self, data, batch_size=128, dtype=None, broadcastable=None, name="Minibatch", random_seed=42, update_shared_f=None, in_memory_size=None, ): if dtype is None: data = pm.smartfloatX(np.asarray(data)) else: data = np.asarray(data, dtype) in_memory_slc = self.make_static_slices(in_memory_size) self.shared = theano.shared(data[in_memory_slc]) self.update_shared_f = update_shared_f self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed) minibatch = self.shared[self.random_slc] if broadcastable is None: broadcastable = (False, ) * minibatch.ndim minibatch = tt.patternbroadcast(minibatch, broadcastable) self.minibatch = minibatch super().__init__(self.minibatch.type, None, None, name=name) theano.Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self]) self.tag.test_value = copy(self.minibatch.tag.test_value)
def grad(self, inp, grads): weights, top = inp[:2] bottom, = grads d_weights = AbstractConv2d_gradWeights( self.imshp, self.kshp, self.border_mode, self.subsample)(bottom, top, weights.shape[-2:]) d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode, self.subsample)(bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. d_weights = patternbroadcast(d_weights, weights.broadcastable) d_top = patternbroadcast(d_top, top.broadcastable) d_height_width = (theano.gradient.DisconnectedType()(), ) return (d_weights, d_top) + d_height_width
def local_gpualloc(node): replace = False if node.op == tensor.alloc: if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu: replace = True elif all([c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients]): replace = True elif all([c != 'output' and c.op == tensor.join and all([i.owner and i.owner.op in [host_from_gpu, tensor.alloc] for i in c.inputs[1:]]) for c, idx in node.outputs[0].clients]): replace = True if replace: val = node.inputs[0] shp = node.inputs[1:] old_out = node.outputs[0] val2 = tensor.shape_padleft(val, len(shp) - val.ndim) new_out = host_from_gpu(gpu_alloc(val, *shp)) if new_out.type != old_out.type: assert new_out.type.ndim == old_out.type.ndim assert new_out.type.dtype == old_out.type.dtype for b_old, b_new in zip(old_out.type.broadcastable, new_out.type.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out. old_out.broadcastable) return [new_out]
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) return outputs
def __init__(self, input_shape, fantasy_particles=1, n_cd=1, reset_pps_int=-1, **kwargs): CostCD.__init__(self, **kwargs) Persistent.__init__(self, reset_pps_int) self.pps_shape = [fantasy_particles] + list(input_shape) """Initialize Fantasy particles """ if len(self.pps_shape) > 2 and (self.pps_shape[3] is None or self.pps_shape[2] is None): raise NotImplementedError("PCD cannot yet deal with dynamic " "dimension lengths. Hint: Use fixed " "length training and dynamic length " "sampling.") else: self.pps = theano.shared(np.cast[fx](np.random.uniform( 0, 1, self.pps_shape)), borrow=True) if self.pps.broadcastable != self.gibbs_step_(self.pps).broadcastable: rebroadcast = T.patternbroadcast(self.gibbs_step_(self.pps), self.pps.broadcastable) else: rebroadcast = self.gibbs_step_(self.pps) self.pps_gibbs_step = self.pps_gibbs_step_fun(rebroadcast) pps_input_step = partial(self.pps_gibbs_step, self.pps.get_value()) for _ in range(n_cd): self.callback_add(pps_input_step, Notifier.BATCH_FINISHED)
def dropout(x, level, noise_shape=None, seed=None): '''Sets entries in `x` to zero at random, while scaling the entire tensor. # Arguments x: tensor level: fraction of the entries in the tensor that will be set to 0. noise_shape: shape for randomly generated keep/drop flags, must be broadcastable to the shape of `x` seed: random seed to ensure determinism. ''' if level < 0. or level >= 1: raise Exception('Dropout level must be in interval [0, 1[.') if seed is None: seed = np.random.randint(1, 10e6) rng = RandomStreams(seed=seed) retain_prob = 1. - level if noise_shape is None: random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) else: random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape]) x *= random_tensor x /= retain_prob return x
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic or self.p == 0: return input else: # Using theano constant to prevent upcasting one = T.constant(1, dtype='int8') retain_prob = one - self.p if self.rescale: input /= retain_prob # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=retain_prob, dtype=input.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) return input * mask
def get_output_for(self, input, deterministic=False, **kwargs): if deterministic or self.p == 0: return input else: # Using theano constant to prevent upcasting one = T.constant(1) retain_prob = one - self.p if self.rescale: input /= retain_prob # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=retain_prob, dtype=input.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) return input * mask
def Recurrence(processed_frames, h0, reset): """ processed_frames.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames, DIM) """ # print "warning no recurrence" # return T.zeros_like(processed_frames), h0 learned_h0 = lib.param( 'Recurrence.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('Recurrence.GRU' + str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) return (grus[-1], last_hidden)
def local_conv_dnn_alternative(node): if not dnn_available(): return if isinstance(node.op, GpuConv): border_mode = node.op.border_mode subsample = node.op.subsample if border_mode not in ['full', 'valid'] or subsample != (1, 1): return img, kern = node.inputs direction_hint = node.op.direction_hint if border_mode == 'full': # for a full convolution, try using the forward pass instead # of the backward pass wrt. inputs direction_hint = 'forward!' elif border_mode == 'valid': # for a valid convolution, try using the backward pass wrt. # weights instead of the forward pass and vice versa if direction_hint == 'bprop weights': direction_hint = 'forward' else: direction_hint = 'bprop weights' rval = dnn_conv(img, kern, border_mode=border_mode, subsample=subsample, direction_hint=direction_hint) if node.outputs[0].broadcastable != rval.broadcastable: rval = tensor.patternbroadcast( rval, node.outputs[0].type.broadcastable) return [rval]
def __init__(self, input, input_shape=None): if isinstance(input, Layer): self.input = input.output Layer.linkstruct[input].append(self) if input_shape == None: input_shape = input.output_shape else: self.input = input self.input_shape = input_shape #Only square image allowed assert input_shape[2] == input_shape[3] #Extend one pixel at each direction shapeext = input_shape[0], input_shape[ 1], input_shape[2] + 2, input_shape[3] + 2 inputext = CachedAlloc(dtypeX(-INF), *shapeext) inputext = T.set_subtensor( inputext[:, :, 1:input_shape[2] + 1, 1:input_shape[3] + 1], self.input) self.output_shape = input_shape[0], input_shape[1], ( input_shape[2] + 1) / 2, (input_shape[3] + 1) / 2 self.output = images2neibs(inputext, (3, 3), (2, 2), 'ignore_borders').mean(axis=-1) self.output = T.patternbroadcast( self.output.reshape(self.output_shape), (False, ) * 4)
def forward(self, inputtensor): if self.deterministic or self.p == 0: return inputtensor else: x = inputtensor[0] # Using theano constant to prevent upcasting one = T.constant(1) retain_prob = one - self.p if self.rescale: x /= retain_prob mask_shape = x.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + x.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=retain_prob, dtype=x.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) x = x * mask return (x, )
def grad(self, inp, grads): bottom, weights = inp top, = grads d_bottom = AbstractConv2d_gradInputs( self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)(weights, top, bottom.shape[-2:]) d_weights = AbstractConv2d_gradWeights( self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)(bottom, top, weights.shape[-2:]) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_weights = patternbroadcast(d_weights, weights.broadcastable) return d_bottom, d_weights
def createGradientFunctions(self): #create X = T.dmatrices("X") mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R") mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma') logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd') logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1]) negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2) theta = mu+T.exp(logSigma)*v W=theta y=X[:,0] X_sim=X[:,1:] f = (T.dot(X_sim,W)+u).flatten() gradvariables = [mu, logSigma, logLambd] logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2) logp = (negKL + logLike)/self.m optimizer = -logp self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore') self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore') self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore') derivatives = T.grad(logp,gradvariables) derivatives.append(logp) self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore') self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore') self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
def test_local_dimshuffle_subtensor(): dimshuffle_subtensor = out2in(local_dimshuffle_subtensor) x = tensor.dtensor4("x") x = tensor.patternbroadcast(x, (False, True, False, False)) i = tensor.iscalar("i") out = x[:, :, 10:30, ::i].dimshuffle(0, 2, 3) g = FunctionGraph([x, i], [out]) dimshuffle_subtensor(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) # Test dimshuffle remove dimensions the subtensor don't "see". x = tensor.tensor(broadcastable=(False, True, False), dtype="float64") out = x[i].dimshuffle(1) g = FunctionGraph([x, i], [out]) dimshuffle_subtensor(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) # Test dimshuffle remove dimensions the subtensor don't "see" but # have in between dimensions. x = tensor.tensor(broadcastable=(False, True, False, True), dtype="float64") out = x[i].dimshuffle(1) f = theano.function([x, i], out) topo = f.maker.fgraph.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) assert f(np.random.rand(5, 1, 4, 1), 2).shape == (4, ) # Test a corner case that had Theano return a bug. x = tensor.dtensor4("x") x = tensor.patternbroadcast(x, (False, True, False, False)) assert x[:, :, 0:3, ::-1].dimshuffle(0, 2, 3).eval({ x: np.ones((5, 1, 6, 7)) }).shape == (5, 3, 7)
def _step(tensor): tensor._keras_shape = (batch_size, 1, input_dim) # tensor._uses_learning_phase = x._uses_learning_phase tensor._uses_learning_phase = False # TODO: should this be hard-coded? output = self.model(tensor) for layer in self.layers: layer.initial_state = layer.final_states output = T.patternbroadcast(output, tensor.broadcastable) return output, self.feedback_function(output)
def euclidean_distance_angles_biwi(y_true, y_pred): diff = y_pred - y_true weights = theano.shared( np.expand_dims(3 * np.array([0.2, 0.35, 0.45]), axis=0)) weights = T.patternbroadcast(weights, (True, False)) diff = diff * weights return K.sqrt(K.sum(K.square(diff), axis=-1, keepdims=True))
def Recurrent( name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None, reset=None ): if not isinstance(inputs, list): inputs = [inputs] if not isinstance(hidden_dims, list): hidden_dims = [hidden_dims] if h0s is None: h0s = [None]*len(hidden_dims) for i in xrange(len(hidden_dims)): if h0s[i] is None: h0_unbatched = lib.param( name + '.h0_' + str(i), np.zeros((hidden_dims[i],), dtype=theano.config.floatX) ) num_batches = inputs[0].shape[1] h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i]) h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim) if reset is not None: last_hiddens = [] for i in xrange(len(h0s)): # The shape of last_hidden doesn't matter right now; we assume # it won't be used until we put something proper in it. last_hidden = theano.shared( np.zeros([1]*h0s[i].ndim, dtype=h0s[i].dtype), name=name+'.last_hidden_'+str(i) ) last_hiddens.append(last_hidden) h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden) outputs, _ = theano.scan( step_fn, sequences=inputs, outputs_info=h0s, non_sequences=non_sequences ) if reset is not None: if len(last_hiddens) == 1: last_hiddens[0].default_update = outputs[-1] else: for i in xrange(len(last_hiddens)): last_hiddens[i].default_update = outputs[i][-1] return outputs
def f(W_real): index = 0 d = { W: T.patternbroadcast( W_real, (False, False, False, False) ) } return theano.clone(expr, d)
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. This version of the function returns a single vector input. """ inputs = utils.as_seq(inputs, tuple) outputs = utils.as_seq(outputs, tuple) if inputs: sym_inputs = [self.get_symbolic(x) for x in inputs] else: sym_inputs = self.s_inputs.values() if outputs: sym_outputs = [self.get_symbolic(x) for x in outputs] else: sym_outputs = self.s_outputs.values() if len(sym_outputs) > 1: raise ValueError( 'VectorArg functions should return a single output.') # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict() sym_args = utils.flat_from_doc(sym_inputs) real_args = utils.flat_from_doc(self.all_init_args) # create a symbolic vector, then split it up into symbolic input # args inputs_dtype = self.vector_from_args(self.all_init_args).dtype theano_input = tt.vector(name='theta', dtype=inputs_dtype) i = 0 for sa, ra in zip(sym_args, real_args): if sa.ndim > 0: vector_arg = theano_input[i: i + ra.size].reshape(ra.shape) else: vector_arg = theano_input[i] s_memo[sa] = tt.patternbroadcast( vector_arg.astype(str(sa.dtype)), broadcastable=sa.broadcastable) i += ra.size # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = graph[sym_outputs[0]] f_in, f_out = self.finalize(theano_input, theano_outputs, graph) return f_in, f_out, graph
def limit_param_norms(parameter_updater, param, max_norm, input_axes): ''' Modifies the update of an SgdParameterUpdater to limit param L2 norms. Parameter norms are computed by summing over the input_axes, provided. These are so named because you typically want to sum over the axes that get dotted with the input to the node (e.g. input_axes=[0] for Linear, input_axes=[1, 2, 3] for Conv2D). Parameters ---------- parameter_updater: simplelearn.training.ParameterUpdater The parameter updater whose updates this will modify. param: theano shared variable The parameter being updated by parameter_updater. (No way to get this from SgdParameterUpdater at present; it updates the parameter and its velocity, and there's no way to safely distinguish them in parameter_updates.update_pairs) max_norm: floating-point scalar The maximum L2 norm to be permitted for the parameters. input_axes: Sequence A Sequence of ints. The indices to sum over when computing the L2 norm of the updated params. ''' assert_is_instance(parameter_updater, ParameterUpdater) assert_in(param, parameter_updater.update_pairs) assert_floating(max_norm) assert_greater(max_norm, 0.0) assert_greater(len(input_axes), 0) assert_all_integer(input_axes) assert_all_greater_equal(input_axes, 0) assert_all_less(input_axes, param.ndim) input_axes = numpy.asarray(input_axes) updated_param = parameter_updater.update_pairs[param] norms = T.sqrt(T.sum(T.sqr(updated_param), axis=input_axes, keepdims=True)) desired_norms = T.clip(norms, 0, max_norm) broadcast_mask = numpy.zeros(param.ndim, dtype=bool) broadcast_mask[input_axes] = True scales = T.patternbroadcast(desired_norms / (1e-7 + norms), broadcast_mask) parameter_updater.update_pairs[param] = updated_param * scales
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU' + str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1]) output = output.reshape( (output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) independent_preds = lib.ops.Linear('BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1]) independent_preds = independent_preds.reshape( (independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def test_local_dimshuffle_subtensor(): dimshuffle_subtensor = out2in(local_dimshuffle_subtensor) x = tensor.dtensor4('x') x = tensor.patternbroadcast(x, (False, True, False, False)) i = tensor.iscalar('i') out = x[:, :, 10:30, ::i].dimshuffle(0, 2, 3) g = FunctionGraph([x, i], [out]) dimshuffle_subtensor(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) # Test dimshuffle remove dimensions the subtensor don't "see". x = tensor.tensor(broadcastable=(False, True, False), dtype='float64') out = x[i].dimshuffle(1) g = FunctionGraph([x, i], [out]) dimshuffle_subtensor(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) # Test dimshuffle remove dimensions the subtensor don't "see" but # have in between dimensions. x = tensor.tensor(broadcastable=(False, True, False, True), dtype='float64') out = x[i].dimshuffle(1) f = theano.function([x, i], out) topo = f.maker.fgraph.toposort() assert any([not isinstance(x, DimShuffle) for x in topo]) assert f(np.random.rand(5, 1, 4, 1), 2).shape == (4,) # Test a corner case that had Theano return a bug. x = tensor.dtensor4('x') x = tensor.patternbroadcast(x, (False, True, False, False)) assert x[:,:, 0:3, ::-1].dimshuffle(0,2,3).eval({x: np.ones((5, 1, 6, 7))}).shape == (5, 3, 7)
def grad(self, inp, grads): weights, top = inp[:2] bottom, = grads d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample)( bottom, top, weights.shape[-2:]) d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode, self.subsample)( bottom, weights) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. d_weights = patternbroadcast(d_weights, weights.broadcastable) d_top = patternbroadcast(d_top, top.broadcastable) d_height_width = (theano.gradient.DisconnectedType()(),) return (d_weights, d_top) + d_height_width
def local_gpuaalloc(node): new_out = gpu_alloc(*node.inputs) # We need to hide new broadcastable dimensions because # ReplaceValidate doesn't like when they change. if new_out.broadcastable != node.outputs[0].broadcastable: # but if a dim is suddenly not broadcastable anymore then that's a bug for b_old, b_new in zip(node.outputs[0].broadcastable, new_out.broadcastable): assert b_new or (not b_old) new_out = tensor.patternbroadcast(new_out, node.outputs[0].broadcastable) return (new_out,)
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. This version of the function returns a single vector input. """ inputs = utils.as_seq(inputs, tuple) outputs = utils.as_seq(outputs, tuple) if inputs: sym_inputs = [self.get_symbolic(x) for x in inputs] else: sym_inputs = self.s_inputs.values() if outputs: sym_outputs = [self.get_symbolic(x) for x in outputs] else: sym_outputs = self.s_outputs.values() if len(sym_outputs) > 1: raise ValueError( 'VectorArg functions should return a single output.') # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict() sym_args = utils.flat_from_doc(sym_inputs) real_args = utils.flat_from_doc(self.all_init_args) # create a symbolic vector, then split it up into symbolic input # args inputs_dtype = self.vector_from_args(self.all_init_args).dtype theano_input = tt.vector(name='theta', dtype=inputs_dtype) i = 0 for sa, ra in zip(sym_args, real_args): if sa.ndim > 0: vector_arg = theano_input[i:i + ra.size].reshape(ra.shape) else: vector_arg = theano_input[i] s_memo[sa] = tt.patternbroadcast(vector_arg.astype(str(sa.dtype)), broadcastable=sa.broadcastable) i += ra.size # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = graph[sym_outputs[0]] f_in, f_out = self.finalize(theano_input, theano_outputs, graph) return f_in, f_out, graph
def big_frame_level_rnn(input_sequences, h0, reset): """ input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE) h0.shape: (batch size, N_BIG_GRUS, BIG_DIM) reset.shape: () output[0].shape: (batch size, n frames, DIM) output[1].shape: same as h0.shape output[2].shape: (batch size, seq len, Q_LEVELS) """ learned_h0 = lib.param( 'BigFrameLevel.h0', numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape(( input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE, BIG_FRAME_SIZE )) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1) frames *= lib.floatX(2) gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_BIG_GRUS): gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear( 'BigFrameLevel.Output', BIG_DIM, DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1] ) output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) independent_preds = lib.ops.Linear( 'BigFrameLevel.IndependentPreds', BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE, grus[-1] ) independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS)) return (output, last_hidden, independent_preds)
def make_functions(self): for param, update in self.updates.items(): if param.broadcastable != update.broadcastable: self.updates[param] = T.patternbroadcast( update, param.broadcastable) self.train_func = theano.function(inputs=self.inputs, outputs=self.train_outputs, updates=self.updates) self.valid_func = theano.function(inputs=self.inputs, outputs=self.valid_outputs)
def frame_level_rnn(input_sequences, other_input, h0, reset): """ input_sequences.shape: (batch size, n frames * FRAME_SIZE) other_input.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames * FRAME_SIZE, DIM) """ learned_h0 = lib.param( 'FrameLevel.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) frames = input_sequences.reshape( (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE, FRAME_SIZE)) # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2] # (a reasonable range to pass as inputs to the RNN) frames = (frames.astype('float32') / lib.floatX(Q_LEVELS / 2)) - lib.floatX(1) frames *= lib.floatX(2) gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM, frames) + other_input gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0', DIM, DIM, gru_input, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('FrameLevel.GRU' + str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) output = lib.ops.Linear('FrameLevel.Output', DIM, FRAME_SIZE * DIM, grus[-1], initialization='he') output = output.reshape( (output.shape[0], output.shape[1] * FRAME_SIZE, DIM)) last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1) return (output, last_hidden)
def unflatten(flatarr, shapes, symb_arrs): arrs = [] n = 0 for (shape,symb_arr) in zip(shapes,symb_arrs): size = np.prod(list(shape)) arr = flatarr[n:n+size].reshape(shape) if arr.type.broadcastable != symb_arr.type.broadcastable: arr = TT.patternbroadcast(arr, symb_arr.type.broadcastable) arrs.append( arr ) n += size return arrs
def grad(self, inp, grads): bottom, weights = inp top, = grads d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)( weights, top, bottom.shape[-2:]) d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp, self.border_mode, self.subsample, self.filter_flip)( bottom, top, weights.shape[-2:]) # Make sure that the broadcastable pattern of the inputs is used # for the gradients, even if the grad opts are not able to infer # that the dimensions are broadcastable. d_bottom = patternbroadcast(d_bottom, bottom.broadcastable) d_weights = patternbroadcast(d_weights, weights.broadcastable) return d_bottom, d_weights
def apply(self, input_, application_call): """Apply the linear transformation followed by masking with noise. Parameters ---------- input_ : :class:`~tensor.TensorVariable` The input on which to apply the transformations Returns ------- output : :class:`~tensor.TensorVariable` The transformed input """ # When not in training mode, turn off noise if not self._training_mode: return input_ if self.tied_sigma: average = tensor.shape_padright(self.flatten.apply(input_), 2) noise_level = (self.prior_noise_level - tensor.clip(self.mask.apply(average), -16, 16)) noise_level = tensor.patternbroadcast(noise_level, (False, False, True, True)) noise_level = copy_and_tag_noise( noise_level, self, LOG_SIGMA, 'log_sigma') else: average = input_ noise_level = (self.prior_noise_level - tensor.clip(self.mask.apply(input_), -16, 16)) noise_level = copy_and_tag_noise( noise_level, self, LOG_SIGMA, 'log_sigma') # Allow incomplete batches by just taking the noise that is needed if self.tied_noise: if self.noise_batch_size is not None: noise = self.parameters[0][:input_.shape[0], :] else: noise = self.theano_rng.normal(input_.shape[0:2]) noise = tensor.shape_padright(2) else: if self.noise_batch_size is not None: noise = self.parameters[0][:input_.shape[0], :, :, :] else: noise = self.theano_rng.normal(input_.shape) kl = ( self.prior_noise_level - noise_level + 0.5 * ( tensor.exp(2 * noise_level) + (average - self.prior_mean) ** 2 ) / tensor.exp(2 * self.prior_noise_level) - 0.5 ) application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits') return input_ + self.noise_rate * tensor.exp(noise_level) * noise
def unflatten_tensor_variables(flatarr, shapes, symb_arrs): import theano.tensor as TT import numpy as np arrs = [] n = 0 for (shape, symb_arr) in zip(shapes, symb_arrs): size = np.prod(list(shape)) arr = flatarr[n:n + size].reshape(shape) if arr.type.broadcastable != symb_arr.type.broadcastable: arr = TT.patternbroadcast(arr, symb_arr.type.broadcastable) arrs.append(arr) n += size return arrs
def gaussian(P,rows,cols,components): input_size = rows * cols points = theano.shared(np.asarray( np.dstack( np.meshgrid(np.arange(cols), np.arange(rows)) ).reshape(input_size,2), dtype=np.float32) ) P.g_mean = np.random.rand(components,2) * np.array([rows,cols]) P.g_scale = 5 * np.random.rand(components,2) P.g_thetas = 2 * np.pi * np.random.rand(components) shifted = T.patternbroadcast(points.reshape((input_size,1,2)),(False,True,False))\ - T.patternbroadcast(P.g_mean.reshape((1,components,2)),(True,False,False)) rot = rotation(P.g_thetas) scale = T.patternbroadcast(P.g_scale.reshape((components,2,1)),(False,False,True)) B = T.patternbroadcast((rot/scale).reshape((1,components,2,2)),(True,False,False,False)) decorr = T.sum( B * T.patternbroadcast(shifted.reshape((input_size,components,1,2)),(False,False,True,False)), axis = 3 ) Z = T.sum(decorr ** 2,axis=2) return T.exp(-Z)
def compute_output(self, network, in_vw): inits = list(toolz.concat(network.find_hyperparameters( ["bias_inits", "inits"], []))) # gather hyperparameters broadcastable = network.find_hyperparameter(["broadcastable"], None) broadcastable_axes = network.find_hyperparameter( ["broadcastable_axes"], None) batch_axis = network.find_hyperparameter(["batch_axis"]) # have broadcastable as a tuple take precedence over broadcastable_axes if broadcastable is None: if broadcastable_axes is None: if batch_axis is None: # no minibatch axis = no default broadcasting broadcastable_axes = [] elif batch_axis >= in_vw.ndim: # scalar input = no broadcasting broadcastable_axes = [] else: # by default, broadcast over minibatch axis, if any broadcastable_axes = [batch_axis] broadcastable = [False] * in_vw.ndim for axis in broadcastable_axes: broadcastable[axis] = True assert len(broadcastable) == in_vw.ndim shape = tuple([1 if is_broadcastable else size for is_broadcastable, size in zip(broadcastable, in_vw.shape)]) b = network.create_vw( name="bias", is_shared=True, shape=shape, tags={"parameter", "bias"}, inits=inits, ) b_var = b.variable # not calling patternbroadcast if not broadcastable, because it seems # to have a small overhead if any(broadcastable): b_var = T.patternbroadcast(b_var, broadcastable) network.create_vw( name="default", variable=(in_vw.variable + b_var), shape=in_vw.shape, tags={"output"}, )
def test_local_dimshuffle_subtensor(): dimshuffle_subtensor = out2in(local_dimshuffle_subtensor) x = tensor.tensor4('x') x = tensor.patternbroadcast(x, (False, True, False, False)) i = tensor.iscalar('i') out = x[:, :, 10:30, ::i].dimshuffle(0,2,3) g = FunctionGraph([x,i], [out]) dimshuffle_subtensor(g) topo = g.toposort() assert any([not isinstance(x, DimShuffle) for x in topo])
def test_broadcast(self): # Test that we can rebroadcast data = numpy.random.rand(10, 10).astype('float32') output_var = f32sc(name="output", value=data) up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func() up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'), output_var.type.broadcastable) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func()