def tensordot(a, b, axes=2): """ implementation of tensordot that reduces to a regular matrix product. This allows tensordot to be GPU accelerated, which isn't possible with the default Theano implementation (which is just a wrapper around numpy.tensordot). based on code from Tijmen Tieleman's gnumpy http://www.cs.toronto.edu/~tijmen/gnumpy.html """ if numpy.isscalar(axes): # if 'axes' is a number of axes to multiply and sum over (trailing axes # of a, leading axes of b), we can just reshape and use dot. outshape = tensor.concatenate([a.shape[:a.ndim - axes], b.shape[axes:]]) outndim = a.ndim + b.ndim - 2*axes a_reshaped = a.reshape((tensor.prod(a.shape[:a.ndim - axes]), tensor.prod(a.shape[a.ndim - axes:]))) b_reshaped = b.reshape((tensor.prod(b.shape[:axes]), tensor.prod(b.shape[axes:]))) return tensor.dot(a_reshaped, b_reshaped).reshape(outshape, ndim=outndim) elif len(axes) == 2: # if 'axes' is a pair of axis lists, we first shuffle the axes of a and # b to reduce this to the first case (note the recursion). a_other, b_other = tuple(axes[0]), tuple(axes[1]) num_axes = len(a_other) a_order = tuple(x for x in tuple(xrange(a.ndim)) if x not in a_other) + a_other b_order = b_other + tuple(x for x in tuple(xrange(b.ndim)) if x not in b_other) a_shuffled = a.dimshuffle(a_order) b_shuffled = b.dimshuffle(b_order) return tensordot(a_shuffled, b_shuffled, num_axes) else: raise ValueError("Axes should be scalar valued or a list/tuple of len 2.")
def __init__(self, rng, input, n_in, n_out, params = None): self.input = input if(params): self.W = params[0] else: W_values = np.asarray( rng.uniform( low=-np.sqrt(6. / (n_in + n_out)), high=np.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) self.W = theano.shared(value=W_values, name='W', borrow=True) if(params): self.b = params[1] else: b_values = np.zeros((n_out,), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, name='b', borrow=True) self.output = T.nnet.relu(T.dot(input, self.W) + self.b) self.params = [self.W, self.b] self.mem_size = (T.prod(self.W.shape)+T.prod(self.b.shape))*4
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get("extra_ndim", 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[: extra_ndim + 1]) new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :]) reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def output_probabilistic(self, m_w_previous, v_w_previous): if (self.non_linear): m_in = self.m_w - m_w_previous v_in = self.v_w # We compute the mean and variance after the ReLU activation lam = self.lam v_1 = 1 + 2*lam*v_in v_1_inv = v_1**-1 s_1 = T.prod(v_1,axis=1)**-0.5 v_2 = 1 + 4*lam*v_in v_2_inv = v_2**-1 s_2 = T.prod(v_2,axis=1)**-0.5 v_inv = v_in**-1 exponent1 = m_in**2*(1 - v_1_inv)*v_inv exponent1 = T.sum(exponent1,axis=1) exponent2 = m_in**2*(1 - v_2_inv)*v_inv exponent2 = T.sum(exponent2,axis=1) m_a = s_1*T.exp(-0.5*exponent1) v_a = s_2*T.exp(-0.5*exponent2) - m_a**2 return (m_a, v_a) else: m_w_previous_with_bias = \ T.concatenate([ m_w_previous, T.alloc(1, 1) ], 0) v_w_previous_with_bias = \ T.concatenate([ v_w_previous, T.alloc(0, 1) ], 0) m_linear = T.dot(self.m_w, m_w_previous_with_bias) / T.sqrt(self.n_inputs) v_linear = (T.dot(self.v_w, v_w_previous_with_bias) + \ T.dot(self.m_w**2, v_w_previous_with_bias) + \ T.dot(self.v_w, m_w_previous_with_bias**2)) / self.n_inputs return (m_linear, v_linear)
def _theano_cpu_multi_batch_beam_grad(array, start_idxs, batch_lens, beam_width, wrap_mode, pad_left=0, pad_right=0, idx_dim=0, batch_dim=1, output_grad=None): # Note: This is slow and hacky. This will create an index-array of the size of the original array. # This is calculated on the CPU. The subtensor then can be done on the GPU, but we should avoid the first part. D_beam = output_grad prod_array_shape = T.prod(array.shape) prod_pad_left_shape = T.prod(pad_left.shape) prod_pad_right_shape = T.prod(pad_right.shape) D_array_tmp_size = prod_array_shape if wrap_mode == "pad": D_array_tmp_size += prod_pad_left_shape + prod_pad_right_shape D_array_tmp_flat = T.zeros([D_array_tmp_size], dtype="float32") # with pad values if wrap_mode == "pad": # Calculate the indices for D_pad_left/D_pad_right in D_array_tmp_flat. pad_left_idxs = T.arange(prod_pad_left_shape) + prod_array_shape pad_right_idxs = T.arange(prod_pad_right_shape) + prod_array_shape + prod_pad_left_shape pad_left_idxs = pad_left_idxs.reshape(pad_left.shape) pad_right_idxs = pad_right_idxs.reshape(pad_right.shape) else: pad_left_idxs = pad_right_idxs = 0 all_idxs = T.arange(T.prod(array.shape)).reshape(array.shape) idxs = multi_batch_beam(array=all_idxs, start_idxs=start_idxs, batch_lens=batch_lens, beam_width=beam_width, wrap_mode=wrap_mode, pad_left=pad_left_idxs, pad_right=pad_right_idxs, idx_dim=idx_dim, batch_dim=batch_dim) D_array_tmp_flat = T.inc_subtensor(D_array_tmp_flat[idxs.flatten()], D_beam.flatten()) if wrap_mode == "pad": D_array = D_array_tmp_flat[:prod_array_shape].reshape(array.shape) D_pad_left = D_array_tmp_flat[pad_left_idxs.flatten()].reshape(pad_left.shape) D_pad_right = D_array_tmp_flat[pad_right_idxs.flatten()].reshape(pad_right.shape) else: D_array = D_array_tmp_flat.reshape(array.shape) D_pad_left = D_pad_right = T.DisconnectedType()() return D_array, D_pad_left, D_pad_right
def kl_normal_diagonal(mu1,sigma_diag1,mu2,sigma_diag2,dim): det1 = T.prod(sigma_diag1) det2 = T.prod(sigma_diag2) inv_sigma_diag2 = 1/sigma_diag_2 mu_diff = mu2-mu1 ret = 0.5 * ( log(det2/det1) - dim + T.sum(inv_sigma_diag2*sigma_diag1) + T.dot(T.dot(mu_diff.T,inv_sigma_diag2),mu_diff) ) return ret
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') vid_dim = input.ndim #Maxpool frame frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) #Maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) outtime = op(input_4D_time) # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1))) #shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def get_norms(model, gradients): """Compute norm of weights and their gradients divided by the number of elements""" norms = [] grad_norms = [] for param_name, param in model.params.iteritems(): norm = T.sqrt(T.sum(T.square(param))) / T.prod(param.shape.astype(theano.config.floatX)) norm.name = 'norm_' + param_name norms.append(norm) grad = gradients[param] grad_norm = T.sqrt(T.sum(T.square(grad))) / T.prod(grad.shape.astype(theano.config.floatX)) grad_norm.name = 'grad_norm_' + param_name grad_norms.append(grad_norm) return norms, grad_norms
def img_2_neibs_with_chans(inputs_sym, patch_size): flat_patches = neighbours.images2neibs(inputs_sym, patch_size, (1,1)) topo_flat_patches = T.reshape(flat_patches,(inputs_sym.shape[0], inputs_sym.shape[1], inputs_sym.shape[2]-patch_size[0]+1, inputs_sym.shape[3]-patch_size[1]+1, patch_size[0], patch_size[1])) flat_patches = topo_flat_patches.dimshuffle(0,2,3,1,4,5) flat_patches = T.reshape(flat_patches, (T.prod(flat_patches.shape[:3]), T.prod(flat_patches.shape[3:]))) return flat_patches
def logp_theano_claims(l,nObs,T,Z,L,X,O_on): #O_on = O_on.astype(np.bool) # tempVec is 1-X*Z tempVec = (1. - X.reshape((nObs,1,X.shape[1]))*(Z.T).reshape((1,Z.shape[1],Z.shape[0]))) # Add the contribution from O = 1 logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*TT.prod(tempVec[O_on.nonzero()],axis=1,no_zeros_in_input=True)).sum() #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1,no_zeros_in_input=True)).sum() #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1)).sum() # Add the contribution from O = 0 logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*TT.prod(tempVec[(1-O_on).nonzero()],axis=1,no_zeros_in_input=True)).sum() #logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*tempVec[(1-O_on).nonzero()].prod(axis=1)).sum() return logLike
def max_pool_3d(input, ds, ignore_border=False): # [n,c,x,y,z]以外の入力は受け付けない if input.ndim != 5: raise NotImplementedError( 'max_pool_3d requires a input [n, c, x, y, z]') # 入力次元 vid_dim = input.ndim # [y, z]フレームの次元数 frame_shape = input.shape[-2:] # バッチサイズ # フレーム次元以外の全ての次元の要素数を掛け合わせる batch_size = T.prod(input.shape[:-2]) # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [ vid_dim - 3]) input_time = out.dimshuffle(shufl) vid_shape = input_time.shape[-2:] batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [ vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, input=None): self.input = input out_dim = T.prod(self.input.shape[1:]) self.output = self.input.reshape((self.input.shape[0], out_dim)) self.linear_output = self.output self.noisy_linear_output = self.output return
def get_reg_ind(self): drop_ax, drop_ay = T.pow(T.exp(self.params[-2]), 2), T.pow(T.exp(self.params[-1]), 2) constant = np.cast[theano.config.floatX](.5 * np.log(self.noise_lvl) + c1 * self.noise_lvl + c2 * (self.noise_lvl**2) + c3 * (self.noise_lvl**3)) reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant reg_ind = T.cast(T.prod(self.params[3].shape), theano.config.floatX) * reg_indx + T.cast(T.prod(self.params[4].shape), theano.config.floatX) * reg_indy return reg_ind
def make_consensus(self, networks, axis=2): cns = self.attrs['consensus'] if cns == 'max': return T.max(networks, axis=axis) elif cns == 'min': return T.min(networks, axis=axis) elif cns == 'mean': return T.mean(networks, axis=axis) elif cns == 'flat': if self.depth == 1: return networks if axis == 2: return networks.flatten(ndim=3) #return T.reshape(networks, (networks.shape[0], networks.shape[1], T.prod(networks.shape[2:]) )) else: return networks.flatten(ndim=2) # T.reshape(networks, (networks.shape[0], T.prod(networks.shape[1:]) )) elif cns == 'sum': return T.sum(networks, axis=axis, acc_dtype=theano.config.floatX) elif cns == 'prod': return T.prod(networks, axis=axis) elif cns == 'var': return T.var(networks, axis=axis) elif cns == 'project': p = self.add_param(self.create_random_uniform_weights(self.attrs['n_out'], 1, self.attrs['n_out'] + self.depth + 1)) return T.tensordot(p, networks, [[1], [axis]]) elif cns == 'random': idx = self.rng.random_integers(size=(1,), low=0, high=self.depth) if axis == 0: return networks[idx] if axis == 1: return networks[:,idx] if axis == 2: return networks[:,:,idx] if axis == 3: return networks[:,:,:,idx] assert False, "axis too large" else: assert False, "consensus method unknown: " + cns
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'. Operation executed on each window. `max` always excludes the padding in the computation. `average` gives you the choice to include or exclude it. :type mode: string """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def add_normal(model, name, m, v): new_var = model['stream'].normal(avg=m, std=T.sqrt(v)) new_factors = [ require(T.gt(v,0)), T.log(2*np.pi), -T.prod(T.shape(new_var))*T.log(v)/2, -(new_var-m)**2/2/v] return add_stochastic(model, name, new_var, new_factors)
def build(self, output, tparams=None, BNparams=None): if self.BN_mode: self.BN_eps = npt(self.BN_eps) if not hasattr(self, 'BN_mean'): self.BN_mean = T.mean(output) if not hasattr(self, 'BN_std'): m2 = (1 + 1 / (T.prod(output.shape) - 1)).astype(floatX) self.BN_std = T.sqrt(m2 * T.var(output) + self.BN_eps) if self.BN_mode == 2: t_mean = T.mean(output, axis=[0, 2, 3], keepdims=True) t_var = T.var(output, axis=[0, 2, 3], keepdims=True) BN_mean = BNparams[p_(self.prefix, 'mean')].dimshuffle( 'x', 0, 'x', 'x') BN_std = BNparams[p_(self.prefix, 'std')].dimshuffle( 'x', 0, 'x', 'x') output = ifelse( self.training, (output - t_mean) / T.sqrt(t_var + self.BN_eps), (output - BN_mean) / BN_std) output *= tparams[p_(self.prefix, 'BN_scale')].dimshuffle( 'x', 0, 'x', 'x') output += tparams[p_(self.prefix, 'BN_shift')].dimshuffle( 'x', 0, 'x', 'x') elif self.BN_mode == 1: t_mean = T.mean(output) t_var = T.var(output) output = ifelse( self.training, (output - t_mean) / T.sqrt(t_var + self.BN_eps), ((output - BNparams[p_(self.prefix, 'mean')]) / BNparams[p_(self.prefix, 'std')])) output *= tparams[p_(self.prefix, 'BN_scale')] output += tparams[p_(self.prefix, 'BN_shift')] self.output = self.activation(output)
def gaussian_likelihood_diagonal_variance(t, mu, sig, dim): """ Gaussian Likelihood along first dimension Parameters ---------- t : TensorVariable mu : FullyConnected (Linear) sig : FullyConnected (Softplus) dim : First dimension of the target vector t """ # First clip sig sig_clip = T.clip(sig, 1e-40, 1e40) # Since the variance matrix is diagonal, normalization term is easier to compute, # and calculus overflow can easily be prevented by first summing by 2*pi and taking square sig_time_2pi = T.sqrt(sig_clip * 2 * math.pi) ####################### ####################### # This is the problem... product goes to 0 normalization_coeff = T.clip(T.prod(sig_time_2pi, axis=0), 1e-40, 1e40) ####################### ####################### # Once again, fact that sig is diagonal allows for simplifications : # term by term division instead of inverse matrix multiplication exp_term = (T.exp(- 0.5 * (t-mu) * (t-mu) / sig_clip).sum(axis=0)) pdf = exp_term / normalization_coeff return pdf
def process(self, input, tparams, BNparams): mode = 'full' if self.border_mode == 'same' else self.border_mode output = conv.conv2d( input=input, filters=tparams[p_(self.prefix, 'W')], image_shape=[self.batch_size, self.n_in[0]] + self.image_shape, filter_shape=[self.n_out] + self.n_in, border_mode=mode, subsample=self.stride) if self.border_mode == 'same': a1 = (self.filter_size[0] - 1) // 2 b1 = (self.filter_size[1] - 1) // 2 a2 = self.filter_size[0] - a1 b2 = self.filter_size[1] - b1 if a2 == 1: if b2 == 1: output = output[:, :, a1:, b1:] else: output = output[:, :, a1:, b1:-b2+1] else: if b2 == 1: output = output[:, :, a1:-a2+1, b1:] else: output = output[:, :, a1:-a2+1, b1:-b2+1] if self.with_bias: output += tparams[p_(self.prefix, 'b')].dimshuffle('x', 0, 'x', 'x') self.BN_mean = T.mean(output, axis=[0, 2, 3]) m2 = (1 + 1 / (T.prod(output.shape) / self.n_out - 1)).astype(floatX) self.BN_std = T.sqrt(m2 * T.var(output, axis=[0, 2, 3]) + npt(self.BN_eps)) return output
def liks(self,a_na, b_nb): mu_nd = a_na[:, :self.d] sig_nd = a_na[:, self.d:] prodsig_n = TT.prod(sig_nd,axis=1) out = TT.exp( TT.square((mu_nd - b_nb)/sig_nd).sum(axis=1) * -.5 ) / (np.cast[floatX](np.sqrt(2*np.pi)**self.d) * prodsig_n) assert out.dtype==floatX return out
def max_pool_2d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale. (2,2) will halve the image in each dimension. :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. """ if input.ndim < 2: raise NotImplementedError("max_pool_2d requires a dimension >= 2") # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), "int64") input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def relevance_conv_z(out_relevances, inputs, weights, bias=None): norms_for_relevances = conv2d(inputs, weights) if bias is not None: norms_for_relevances += bias.dimshuffle("x", 0, "x", "x") # stabilize # prevent division by 0 and division by small numbers eps = 1e-4 norms_for_relevances += T.sgn(norms_for_relevances) * eps norms_for_relevances += T.eq(norms_for_relevances, 0) * eps normed_relevances = out_relevances / norms_for_relevances # upconv in_relevances = conv2d(normed_relevances, weights.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full") in_relevances_proper = in_relevances * inputs if bias is not None: bias_relevance = bias.dimshuffle("x", 0, "x", "x") * normed_relevances # Divide bias by weight size before convolving back # mean across channel, 0, 1 dims (hope this is correct?) fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX) bias_rel_in = conv2d( fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full" ) in_relevances_proper += bias_rel_in return in_relevances_proper
def _backward_negative_z(inputs, weights, normed_relevances, bias=None): inputs_plus = inputs * T.gt(inputs, 0) weights_plus = weights * T.gt(weights, 0) inputs_minus = inputs * T.lt(inputs, 0) weights_minus = weights * T.lt(weights, 0) # Compute weights+ * inputs- and weights- * inputs+ negative_part_a = conv2d( normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full" ) negative_part_a *= inputs_minus negative_part_b = conv2d( normed_relevances, weights_minus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full" ) negative_part_b *= inputs_plus together = negative_part_a + negative_part_b if bias is not None: bias_negative = bias * T.lt(bias, 0) bias_relevance = bias_negative.dimshuffle("x", 0, "x", "x") * normed_relevances # Divide bias by weight size before convolving back # mean across channel, 0, 1 dims (hope this is correct?) fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX) bias_rel_in = conv2d( fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full" ) together += bias_rel_in return together
def __init__(self, input, n_in, n_out, prefix='hidden', W=None, b=None, activation=T.tanh): self.input = input if W is None: W_values = numpy.asarray( np.random.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name=_p(prefix, 'W'), borrow=True) if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name=_p(prefix, 'b'), borrow=True) self.W = W self.b = b self.pre_activation = T.dot(input, self.W) + self.b if activation is None: self.output = self.pre_activation elif activation == T.nnet.softmax: shape= self.pre_activation.shape tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1])) self.output = T.nnet.softmax(tmp).reshape(shape) else: self.output = activation(self.pre_activation) self.params = {_p(prefix, 'W'):W, _p(prefix, 'b'):b}
def relevance_conv_z_plus(out_relevances, inputs, weights, bias=None): if bias is not None: log.warning("Bias not respected for conv z_plus") # hack for negative inputs # inputs = T.abs_(inputs) weights_plus = weights * T.gt(weights, 0) norms_for_relevances = conv2d(inputs, weights_plus) # prevent division by 0... # adds 1 to every entry that is 0 -> sets 0s to 1 relevances_are_0 = T.eq(norms_for_relevances, 0) norms_for_relevances += relevances_are_0 * 1 normed_relevances = out_relevances / norms_for_relevances # upconv in_relevances = conv2d(normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full") in_relevances_proper = in_relevances * inputs # Correct for those parts where all inputs of a relevance were # zero, spread relevance equally them pool_ones = T.ones(weights_plus.shape, dtype=np.float32) # mean across channel, 0, 1 dims (hope this is correct?) pool_fractions = pool_ones / T.prod(weights_plus.shape[1:]).astype(theano.config.floatX) in_relevances_from_0 = conv2d( out_relevances * relevances_are_0, pool_fractions.dimshuffle(1, 0, 2, 3), subsample=(1, 1), border_mode="full" ) in_relevances_proper += in_relevances_from_0 return in_relevances_proper
def max_pool_switch_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), index_type='flattened', index_scope='local'): if input.ndim < 2: raise NotImplementedError('max_pool_switched_2d requires a dimension >= 2') if ignore_border is None: ignore_border = False if input.ndim == 4: op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1]), img_shape), 'int64') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input_4D) # restore to original shape outshp = T.join(0, input.shape[:-2], output.shape[-2:]) return T.reshape(output, outshp, ndim=input.ndim)
def unpool_switch_2d(input, ds, st=None, index_type='flattened', index_scope='local', original_input_shape=None): if input.ndim < 3: raise NotImplementedError('unpool_switched_2d requires a dimension >= 3') if input.ndim == 4: op = UnpoolSwitch(ds, st=st, index_type=index_type, index_scope=index_scope, original_input_shape=original_input_shape) output = op(input) return output # extract image dimensions img_shape = input.shape[-3:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-3]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, img_shape), 'int64') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = UnpoolSwitch(ds, st=st, index_type=index_type, index_scope=index_scope, original_input_shape=original_input_shape) output = op(input_4D) # restore to original shape outshp = T.join(0, input.shape[:-2], output.shape[-2:]) return T.reshape(output, outshp, ndim=input.ndim)
def connect(self, inputs): # keep last dimension for vectorized operation output = inputs.reshape((inputs.shape[0], T.prod(inputs.shape[1:]))) self.inputs.append(inputs) self.outputs.append(output) return output
def build_obj(z_sample,z_mu,z_sigma,x_orig,x_out): z_sigma_fixed = z_sigma z_sigma_inv = 1/(z_sigma_fixed) det_z_sigma = T.prod(z_sigma) C = 1./(T.sqrt(((2*np.pi)**z_dim) * det_z_sigma)) log_q_z_given_x = - 0.5*T.dot(z_sigma_inv, ((z_sample-z_mu)**2).T) + T.log(C) # log(C) can be omitted q_z_given_x = C * T.exp(log_q_z_given_x) log_p_x_given_z = -(1/(x_sigma))*(((x_orig-x_out)**2).sum()) # because p(x|z) is gaussian log_p_z = - (z_sample**2).sum() # gaussian prior with mean 0 and cov I #reconstruction_error_const = (0.5*(x_dim*np.log(np.pi)+1)).astype('float32') reconstruction_error_proper = 0.5*T.sum((x_orig-x_out)**2) reconstruction_error = reconstruction_error_proper #+ reconstruction_error_const regularizer = kl_normal_diagonal_vs_unit(z_mu,z_sigma,z_dim) obj = reconstruction_error + regularizer obj_scalar = obj.reshape((),ndim=0) return obj_scalar,[ reconstruction_error, #1 regularizer,#2 log_q_z_given_x,#3 det_z_sigma,#4 q_z_given_x,#5 log_p_x_given_z,#6 log_p_z,#7 z_sample,#8 z_mu,#9 z_sigma,#10, z_sigma_inv,#11 z_sigma_fixed,#12 C,#13 reconstruction_error_proper,#14 ]
def numel(x): return T.prod(x.shape)
def build_simple_model( input_var, filter_size=[5, 11, 25], n_filters=64, n_classes=6, depth=1, last_filter_size=1, nb_in_channels=1, #bn_relu_conv = False, #unused for now out_nonlin=softmax): ''' Parameters: ----------- input_var : theano tensor filter_size : list of odd int (to fit with same padding), size of filter_size list determines the number of convLayer to Concatenate n_filters : int, number of filters for each convLayer n_classes : int depth : int, number of stacked convolution before concatenation last_filter_size : int, must be set to 1 (the older version had a last_filter_size of 3, that was an error the argument is there to be able to reassign weights correctly when testing) out_nonlin : default=softmax, non linearity function ''' net = {} net['input'] = InputLayer((None, nb_in_channels, 200), input_var) #incoming_layer = 'input' #Convolution layers n_conv = len(filter_size) for d in range(depth): for i in range(n_conv): incoming_layer = 'input' if d == 0 else 'relu' + str( d - 1) + '_' + str(i) net['conv' + str(d) + '_' + str(i)] = ConvLayer( net[incoming_layer], num_filters=n_filters, filter_size=filter_size[i], pad='same', nonlinearity=None) net['bn' + str(d) + '_' + str(i)] = BatchNormLayer( net['conv' + str(d) + '_' + str(i)]) net['relu' + str(d) + '_' + str(i)] = NonlinearityLayer( net['bn' + str(d) + '_' + str(i)], nonlinearity=rectify) #batch_norm insert batch normalization BETWEEN conv and relu #non linearity #Concatenation layer of the aboves ConvLayer layers_to_concatenate = [ net['relu' + str(depth - 1) + '_' + str(i)] for i in range(n_conv) ] net['concat'] = ConcatLayer(layers_to_concatenate) incoming_layer = 'concat' #Output layer net['final_conv'] = ConvLayer(net[incoming_layer], num_filters=n_classes, filter_size=last_filter_size, pad='same') incoming_layer = 'final_conv' #DimshuffleLayer and ReshapeLayer to fit the softmax implementation #(it needs a 1D or 2D tensor, not a 3D tensor) net['final_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0, 2, 1)) incoming_layer = 'final_dimshuffle' layerSize = lasagne.layers.get_output(net[incoming_layer]).shape net['final_reshape'] = ReshapeLayer(net[incoming_layer], (T.prod(layerSize[0:2]), layerSize[2])) # (200*batch_size,n_classes)) incoming_layer = 'final_reshape' #This is the layer that computes the prediction net['last_layer'] = NonlinearityLayer(net[incoming_layer], nonlinearity=out_nonlin) incoming_layer = 'last_layer' #Layers needed to visualize the prediction of the network net['probs_reshape'] = ReshapeLayer( net[incoming_layer], (layerSize[0], layerSize[1], n_classes)) incoming_layer = 'probs_reshape' net['probs_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0, 2, 1)) # [net[l] for l in ['last_layer']] : used to directly compute the output # of the network # net : dictionary containing each layer {name : Layer instance} return [net[l] for l in ['last_layer']], net
def build(self): """ Build the model variables. """ CMReduction = self.build_reduction_var() # Window of active countermeasures extended into the past Earlier_ActiveCMs = self.d.get_ActiveCMs( self.d.Ds[0] - pd.DateOffset(self.CMDelayCut), self.d.Ds[-1]) # [region, CM, day] Reduction factor for each CM,C,D ActiveCMReduction = (T.reshape(CMReduction, (1, self.nCMs, 1))**Earlier_ActiveCMs) # [region, day] Reduction factor from CMs for each C,D (noise added below) GrowthReduction = self.Det("GrowthReduction", T.prod(ActiveCMReduction, axis=1), plot_trace=False) # [region, day] Convolution of GrowthReduction by DelayProb along days DelayedGrowthReduction = self.Det( "DelayedGrowthReduction", geom_convolution(GrowthReduction, self.CMDelayProb, axis=1)[:, self.CMDelayCut:], plot_trace=False, ) # [] Baseline growth rate (wide prior OK, mean estimates ~10% daily growth) BaseGrowthRate = self.LogNorm("BaseGrowthRate", 1.2, 2.3) # [region] Region growth rate # TODO: Estimate growth rate variance RegionGrowthRate = self.LogNorm("RegionGrowthRate", BaseGrowthRate, 0.3, shape=(self.nRs, )) # [region] Region unreliability as common scale multiplier of its: # * measurements (measurement unreliability) # * expected growth noise # TODO: Estimate good prior (but can be weak?) RegionScaleMult = self.LogNorm("RegionScaleMult", 1.0, 1.0, shape=(self.nRs, )) # [region, day] The ideal predicted daily growth PredictedGrowth = self.Det( "PredictedGrowth", T.reshape(RegionGrowthRate, (self.nRs, 1)) * DelayedGrowthReduction, plot_trace=False, ) # [region, day] The actual (still hidden) growth rate each day # TODO: Estimate noise varince (should be small, measurement variance below) # Miscalibration: too low: time effects pushed into CMs, too high: explains away CMs RealGrowth = self.LogNorm( "RealGrowth", PredictedGrowth, RegionScaleMult.reshape((self.nRs, 1)) * 0.1, shape=(self.nRs, self.nDs), plot_trace=False, ) # [region, day] Multiplicative noise applied to predicted growth rate RealGrowthNoise = self.Det("RealGrowthNoise", RealGrowth / PredictedGrowth, plot_trace=False) # [region] Initial size of epidemic (the day before the start, only those detected; wide prior OK) InitialSize = self.LogNorm("InitialSize", 1.0, 10, shape=(self.nRs, )) # [region, day] The number of cases that would be detected with noiseless testing # (Noise source includes both false-P/N rates and local variance in test volume and targetting) # (Since we ony care about growth rates and assume consistent testing, it is fine to ignore real size) Size = self.Det( "Size", T.reshape(InitialSize, (self.nRs, 1)) * self.RealGrowth.cumprod(axis=1), plot_trace=False, ) # [region, day] Cummulative tested positives Observed = self.LogNorm( "Observed", Size, 0.4, # self.RegionScaleMult.reshape((self.nRs, 1)) * 0.4, shape=(self.nRs, self.nDs), observed=self.d.Confirmed, plot_trace=False, ) # [region, day] Multiplicative noise applied to predicted growth rate # Note: computed backwards, since self.Observed needs to be a distribution ObservedNoise = self.Det("ObservedNoise", Observed / Size, plot_trace=False)
def buildFCN8(nb_in_channels, input_var, path_weights='/Tmp/romerosa/itinf/models/' + 'camvid/new_fcn8_model_best.npz', n_classes=21, load_weights=True, void_labels=[], trainable=False, layer=['probs_dimshuffle'], pascal=False, temperature=1.0, dropout=0.5): ''' Build fcn8 model ''' net = {} # Contracting path net['input'] = InputLayer((None, nb_in_channels, None, None), input_var) # pool 1 net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=100, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad='same', flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) # pool 2 net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad='same', flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad='same', flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) # pool 3 net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad='same', flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad='same', flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad='same', flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) # pool 4 net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad='same', flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad='same', flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad='same', flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) # pool 5 net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad='same', flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad='same', flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad='same', flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) # fc6 net['fc6'] = ConvLayer(net['pool5'], 4096, 7, pad='valid', flip_filters=False) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout) # fc7 net['fc7'] = ConvLayer(net['fc6_dropout'], 4096, 1, pad='valid', flip_filters=False) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout) net['score_fr'] = ConvLayer(net['fc7_dropout'], n_classes, 1, pad='valid', flip_filters=False) # Upsampling path # Unpool net['score2'] = DeconvLayer(net['score_fr'], n_classes, 4, stride=2, crop='valid', nonlinearity=linear) net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1, pad='same') net['score_fused'] = ElemwiseSumLayer( (net['score2'], net['score_pool4']), cropping=[None, None, 'center', 'center']) # Unpool net['score4'] = DeconvLayer(net['score_fused'], n_classes, 4, stride=2, crop='valid', nonlinearity=linear) net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1, pad='valid') net['score_final'] = ElemwiseSumLayer( (net['score4'], net['score_pool3']), cropping=[None, None, 'center', 'center']) # Unpool net['upsample'] = DeconvLayer(net['score_final'], n_classes, 16, stride=8, crop='valid', nonlinearity=linear) upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1] net['input_tmp'] = InputLayer((None, upsample_shape, None, None), input_var) net['score'] = ElemwiseMergeLayer( (net['input_tmp'], net['upsample']), merge_function=lambda input, deconv: deconv, cropping=[None, None, 'center', 'center']) # Final dimshuffle, reshape and softmax net['final_dimshuffle'] = \ lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = \ lasagne.layers.ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:3]), laySize[3])) net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'], nonlinearity=softmax) # end-snippet-1 # Do not train if not trainable: freezeParameters(net['probs']) # Go back to 4D net['probs_reshape'] = ReshapeLayer( net['probs'], (laySize[0], laySize[1], laySize[2], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 3, 1, 2)) # Apply temperature if load_weights: soft_value = net['upsample'].W.get_value() / temperature net['upsample'].W.set_value(soft_value) soft_value = net['upsample'].b.get_value() / temperature net['upsample'].b.set_value(soft_value) return [net[el] for el in layer]
def batch_flatten(x): '''Turn a n-D tensor into a 2D tensor where the first dimension is conserved. ''' x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0])) return x
def max_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) Parameters ---------- input : N-D theano tensor of input images Input images. Max pooling will be done over the 2 last dimensions. ds : tuple of length 2 Factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. ignore_border : bool (default None, will print a warning and set to False) When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. st : tuple of lenght 2 Stride size, which is the number of shifts over rows/cols to get the next pool region. If st is None, it is considered equal to ds (no overlap on pooling regions). padding : tuple of two ints (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'} Operation executed on each window. `max` and `sum` always exclude the padding in the computation. `average` gives you the choice to include or exclude it. """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if ignore_border is None: warnings.warn( "max_pool_2d() will have the parameter ignore_border" " default value changed to True (currently" " False). To have consistent behavior with all Theano" " version, explicitly add the parameter" " ignore_border=True. (this is also faster than" " ignore_border=False)", stacklevel=2) ignore_border = False if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = Pool(ignore_border) output = op(input_4D, ws=(ds[1], ds[2])) # restore to original shape outshape = tensor.join(0, input.shape[:-2], output.shape[-2:]) out = tensor.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input_time.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = tensor.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = Pool(ignore_border) outtime = op(input_4D_time, ws=(1, ds[0])) # output # restore to original shape (xxx, rows, cols, time) outshape = tensor.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) return tensor.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def get_regs(self, x0_, x, M): """ Regularization terms. """ regs = 0 #================================================================================= # L1 recurrent weights #================================================================================= L1_Wrec = self.config['L1_Wrec'] if L1_Wrec > 0: print("L1_Wrec = {}".format(L1_Wrec)) W = self.get('Wrec') reg = tensor.sum(abs(W)) size = tensor.prod(W.shape) #W = self.get('Wrec_gates') #reg += tensor.sum(abs(W)) #size += tensor.prod(W.shape) regs += L1_Wrec * reg/size #================================================================================= # L2 recurrent weights #================================================================================= L2_Wrec = self.config['L2_Wrec'] if L2_Wrec > 0: print("L2_Wrec = {}".format(L2_Wrec)) W = self.get('Wrec') reg = tensor.sum(tensor.sqr(W)) size = tensor.prod(W.shape) W = self.get('Wrec_gates') reg += tensor.sum(tensor.sqr(W)) size += tensor.prod(W.shape) regs += L2_Wrec * reg/size #================================================================================= # Firing rates #================================================================================= L2_r = self.config['L2_r'] if L2_r > 0: # Repeat (T, B) -> (T, B, N) M_ = (tensor.tile(M.T, (x.shape[-1], 1, 1))).T # Combine t=0 with t>0 x_all = tensor.concatenate( [x0_.reshape((1, x0_.shape[0], x0_.shape[1])), x], axis=0 ) # Firing rate r = self.f_hidden(x_all) # Regularization regs += L2_r * tensor.sum(tensor.sqr(r)*M_)/tensor.sum(M_) #================================================================================= return regs
def normal(self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, truncate=False, **kwargs): """ Sample a tensor of values from a normal distribution. Parameters ---------- size : int_vector_like Array dimensions for the output tensor. avg : float_like, optional The mean value for the truncated normal to sample from (defaults to 0.0). std : float_like, optional The standard deviation for the truncated normal to sample from (defaults to 1.0). truncate : bool, optional Truncates the normal distribution at 2 standard deviations if True (defaults to False). When this flag is set, the standard deviation of the result will be less than the one specified. ndim : int, optional The number of dimensions for the output tensor (defaults to None). This argument is necessary if the size argument is ambiguous on the number of dimensions. dtype : str, optional The data-type for the output tensor. If not specified, the dtype is inferred from avg and std, but it is at least as precise as floatX. kwargs Other keyword arguments for random number generation (see uniform). Returns ------- samples : TensorVariable A Theano tensor of samples randomly drawn from a normal distribution. """ size = _check_size(size) avg = undefined_grad(as_tensor_variable(avg)) std = undefined_grad(as_tensor_variable(std)) if dtype is None: dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) avg = tensor.cast(avg, dtype=dtype) std = tensor.cast(std, dtype=dtype) # generate even number of uniform samples # Do manual constant folding to lower optiimizer work. if isinstance(size, theano.Constant): n_odd_samples = size.prod(dtype='int64') else: n_odd_samples = tensor.prod(size, dtype='int64') n_even_samples = n_odd_samples + n_odd_samples % 2 uniform = self.uniform((n_even_samples, ), low=0., high=1., ndim=1, dtype=dtype, nstreams=nstreams, **kwargs) # box-muller transform u1 = uniform[:n_even_samples // 2] u2 = uniform[n_even_samples // 2:] r = tensor.sqrt(-2.0 * tensor.log(u1)) theta = np.array(2.0 * np.pi, dtype=dtype) * u2 cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta) z0 = r * cos_theta z1 = r * sin_theta if truncate: # use valid samples to_fix0 = (z0 < -2.) | (z0 > 2.) to_fix1 = (z1 < -2.) | (z1 > 2.) z0_valid = z0[tensor.nonzero(~to_fix0)] z1_valid = z1[tensor.nonzero(~to_fix1)] # re-sample invalid samples to_fix0 = tensor.nonzero(to_fix0)[0] to_fix1 = tensor.nonzero(to_fix1)[0] n_fix_samples = to_fix0.size + to_fix1.size lower = tensor.constant(1. / np.e**2, dtype=dtype) u_fix = self.uniform((n_fix_samples, ), low=lower, high=1., ndim=1, dtype=dtype, nstreams=nstreams, **kwargs) r_fix = tensor.sqrt(-2. * tensor.log(u_fix)) z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0] z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1] # pack everything together to a useful result norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed) else: norm_samples = tensor.join(0, z0, z1) if isinstance(n_odd_samples, theano.Variable): samples = norm_samples[:n_odd_samples] elif n_odd_samples % 2 == 1: samples = norm_samples[:-1] else: samples = norm_samples samples = tensor.reshape(samples, newshape=size, ndim=ndim) samples *= std samples += avg return samples
def init_opt(self): # obs_var_raw = self.env.observation_space.new_tensor_variable( # 'obs', # extra_dims=1, # ) obs_var_raw = ext.new_tensor( 'obs', ndim=3, dtype=theano.config.floatX) # todo: check the dtype action_var = self.env.action_space.new_tensor_variable( 'action', extra_dims=1, ) # this will have to be the advantage every self.period timesteps advantage_var = ext.new_tensor('advantage', ndim=1, dtype=theano.config.floatX) obs_var_sparse = ext.new_tensor( 'sparse_obs', ndim=2, dtype=theano.config. floatX # todo: check this with carlos, refer to discrete.py in rllab.spaces ) assert isinstance(self.policy, HierarchicalPolicy) # todo: assumptions: 1 trajectory, which is a multiple of p; that the obs_var_probs is valid # undoing the reshape, so that batch sampling is ok obs_var = TT.reshape(obs_var_raw, [ obs_var_raw.shape[0] * obs_var_raw.shape[1], obs_var_raw.shape[2] ]) # obs_var = obs_var_raw # i, j should contain the probability of latent j at time step self.period*i # should be a len(obs)//self.period by len(self.latent) tensor latent_probs = self.policy.manager.dist_info_sym( obs_var_sparse)['prob'] # get the distribution parameters # dist_info_vars = [] # for latent in self.latents: # self.policy.low_policy.set_latent_train(latent) # dist_info_vars.append(self.policy.low_policy.dist_info_sym(obs_var)) # hopefully the above line takes multiple samples, and state_info_vars not needed as input dist_info_vars = self.policy.low_policy.dist_info_sym_all_latents( obs_var) probs = [ TT.exp(self.diagonal.log_likelihood_sym(action_var, dist_info)) for dist_info in dist_info_vars ] # need to reshape at the end reshaped_probs = [ TT.reshape(prob, [obs_var.shape[0] // self.period, self.period]) for prob in probs ] # now, multiply out each row and concatenate subtrajectory_probs = TT.stack([ TT.prod(reshaped_prob, axis=1) for reshaped_prob in reshaped_probs ], axis=1) # shape error might come out of here # elementwise multiplication, then sum up each individual row and take log likelihood = TT.log(TT.max(subtrajectory_probs * latent_probs, axis=1)) surr_loss = -TT.mean(likelihood * advantage_var) input_list = [obs_var_raw, obs_var_sparse, action_var, advantage_var] # npo has state_info_vars and old_dist_info_vars, I don't think I need them until I go for NPO/TRPO self.optimizer.update_opt(loss=surr_loss, target=self.policy, inputs=input_list) return dict()
def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None): """ :param size: Can be a list of integers or Theano variables (ex: the shape of another Theano Variable) :param dtype: The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. :param nstreams: Number of streams. """ # We need an even number of ]0,1[ samples. Then we split them # in two halves. First half becomes our U1's for Box-Muller, # second half our U2's. See Wikipedia page: # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform avg = as_tensor_variable(avg) std = as_tensor_variable(std) if dtype is None: dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) avg = cast(avg, dtype) std = cast(std, dtype) evened = False constant = False if isinstance(size, tuple) and all([isinstance(i, int) for i in size]): constant = True n_samples = numpy.prod(size) if n_samples % 2 == 1: n_samples += 1 evened = True else: #if even, don't change, if odd, +1 n_samples = prod(size) + (prod(size) % 2) flattened = self.uniform(size=(n_samples, ), dtype=dtype, nstreams=nstreams) if constant: U1 = flattened[:n_samples // 2] U2 = flattened[n_samples // 2:] else: U1 = flattened[:prod(flattened.shape) // 2] U2 = flattened[prod(flattened.shape) // 2:] #normal_samples = zeros_like(flattened) sqrt_ln_U1 = sqrt(-2.0 * log(U1)) # TypeError: 'TensorVariable' object does not support item assignment # so this doesn't work... #normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2) #normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2) # so trying this instead first_half = sqrt_ln_U1 * cos( numpy.array(2.0 * numpy.pi, dtype=dtype) * U2) second_half = sqrt_ln_U1 * sin( numpy.array(2.0 * numpy.pi, dtype=dtype) * U2) normal_samples = join(0, first_half, second_half) final_samples = None if evened: final_samples = normal_samples[:-1] elif constant: final_samples = normal_samples else: final_samples = normal_samples[:prod(size)] if size: final_samples = final_samples.reshape(size) final_samples = avg + std * final_samples assert final_samples.dtype == dtype return final_samples
def _get_scaling(total_size, shape, ndim): """ Gets scaling constant for logp Parameters ---------- total_size : int or list[int] shape : shape shape to scale ndim : int ndim hint Returns ------- scalar """ if total_size is None: coef = pm.floatX(1) elif isinstance(total_size, int): if ndim >= 1: denom = shape[0] else: denom = 1 coef = pm.floatX(total_size) / pm.floatX(denom) elif isinstance(total_size, (list, tuple)): if not all( isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) if Ellipsis in total_size: sep = total_size.index(Ellipsis) begin = total_size[:sep] end = total_size[sep + 1:] if Ellipsis in end: raise ValueError( 'Double Ellipsis in `total_size` is restricted, got %r' % total_size) else: begin = total_size end = [] if (len(begin) + len(end)) > ndim: raise ValueError('Length of `total_size` is too big, ' 'number of scalings is bigger that ndim, got %r' % total_size) elif (len(begin) + len(end)) == 0: return pm.floatX(1) if len(end) > 0: shp_end = shape[-len(end):] else: shp_end = np.asarray([]) shp_begin = shape[:len(begin)] begin_coef = [ pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None ] end_coef = [ pm.floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None ] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) return tt.as_tensor(pm.floatX(coef))
def batch_flatten(x): x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0])) return x
def prod(self, x, axis=None, keepdims=False): '''Multiply the values in a tensor, alongside the specified axis. ''' return T.prod(x, axis=axis, keepdims=keepdims)