def tensordot(a, b, axes=2):
    """
    implementation of tensordot that reduces to a regular matrix product. This allows tensordot to be GPU accelerated,
    which isn't possible with the default Theano implementation (which is just a wrapper around numpy.tensordot).
    based on code from Tijmen Tieleman's gnumpy http://www.cs.toronto.edu/~tijmen/gnumpy.html
    """
    if numpy.isscalar(axes):
        # if 'axes' is a number of axes to multiply and sum over (trailing axes
        # of a, leading axes of b), we can just reshape and use dot.
        outshape = tensor.concatenate([a.shape[:a.ndim - axes], b.shape[axes:]])
        outndim = a.ndim + b.ndim - 2*axes
        a_reshaped = a.reshape((tensor.prod(a.shape[:a.ndim - axes]), tensor.prod(a.shape[a.ndim - axes:])))
        b_reshaped = b.reshape((tensor.prod(b.shape[:axes]), tensor.prod(b.shape[axes:])))
        return tensor.dot(a_reshaped, b_reshaped).reshape(outshape, ndim=outndim)
    elif len(axes) == 2:
        # if 'axes' is a pair of axis lists, we first shuffle the axes of a and
        # b to reduce this to the first case (note the recursion).
        a_other, b_other = tuple(axes[0]), tuple(axes[1])
        num_axes = len(a_other)
        a_order = tuple(x for x in tuple(xrange(a.ndim)) if x not in a_other) + a_other
        b_order = b_other + tuple(x for x in tuple(xrange(b.ndim)) if x not in b_other)
        a_shuffled = a.dimshuffle(a_order)
        b_shuffled = b.dimshuffle(b_order)
        return tensordot(a_shuffled, b_shuffled, num_axes)
    else:
        raise ValueError("Axes should be scalar valued or a list/tuple of len 2.")
Exemple #2
0
    def __init__(self, rng, input, n_in, n_out, params = None):
		self.input = input
		if(params):
			self.W = params[0]
		else:
		    W_values = np.asarray(
		            rng.uniform(
		                low=-np.sqrt(6. / (n_in + n_out)),
		                high=np.sqrt(6. / (n_in + n_out)),
		                size=(n_in, n_out)
		            ),
		            dtype=theano.config.floatX
		        )
		    self.W = theano.shared(value=W_values, name='W', borrow=True)

		if(params):
			self.b = params[1]
		else:
			b_values = np.zeros((n_out,), dtype=theano.config.floatX)
			self.b = theano.shared(value=b_values, name='b', borrow=True)

		self.output = T.nnet.relu(T.dot(input, self.W) + self.b)

		self.params = [self.W, self.b]

		self.mem_size = (T.prod(self.W.shape)+T.prod(self.b.shape))*4
def maxpool_3D(input, ds, ignore_border=False):
   
    #input.dimshuffle (0, 2, 1, 3, 4)   # convert to make video in back. 
    # no need to reshuffle. 
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of 
    # downsamplefactormax. First maxpool frames as usual. 
    # Then maxpool the time dimension. Shift the time dimension to the third 
    # position, so rows and cols are in the back


    # extract dimensions
    frame_shape = input.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)          # so second and third dimensions of ds are for height and width
    output = op(input_4D)
    # restore to original shape                                     
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1,ds[0]), ignore_border)            # Here the time dimension is downsampled. 
    outtime = op(input_4D_time)
    # output 
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #4
0
        def apply(self, application, *args, **kwargs):
            # extra_ndim is a mandatory parameter, but in order not to
            # confuse with positional inputs, it has to be extracted from
            # **kwargs
            extra_ndim = kwargs.get("extra_ndim", 0)

            inputs = dict(zip(application.inputs, args))
            inputs.update(dict_subset(kwargs, application.inputs, must_have=False))
            reshaped_inputs = inputs
            # To prevent pollution of the computation graph with no-ops
            if extra_ndim > 0:
                for name, input_ in inputs.items():
                    shape, ndim = input_.shape, input_.ndim
                    # Remember extra_dims for reshaping the outputs correctly.
                    # Does not matter from which input, since we assume
                    # extra dimension match for all inputs.
                    extra_dims = shape[:extra_ndim]
                    new_first_dim = tensor.prod(shape[: extra_ndim + 1])
                    new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :])
                    reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim)
            outputs = wrapped.__get__(self, None)(**reshaped_inputs)
            if extra_ndim == 0:
                return outputs
            reshaped_outputs = []
            for output in pack(outputs):
                shape, ndim = output.shape, output.ndim
                new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:])
                reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim))
            return reshaped_outputs
    def output_probabilistic(self, m_w_previous, v_w_previous):
        if (self.non_linear):
            m_in = self.m_w - m_w_previous
            v_in = self.v_w
            # We compute the mean and variance after the ReLU activation
            lam = self.lam
            v_1 = 1 + 2*lam*v_in
            v_1_inv = v_1**-1

            s_1 = T.prod(v_1,axis=1)**-0.5
            v_2 = 1 + 4*lam*v_in
            v_2_inv = v_2**-1
            s_2 = T.prod(v_2,axis=1)**-0.5
            v_inv = v_in**-1
            exponent1 = m_in**2*(1 - v_1_inv)*v_inv
            exponent1 = T.sum(exponent1,axis=1)
            exponent2 = m_in**2*(1 - v_2_inv)*v_inv
            exponent2 = T.sum(exponent2,axis=1)
            m_a = s_1*T.exp(-0.5*exponent1)
            v_a = s_2*T.exp(-0.5*exponent2) - m_a**2

            return (m_a, v_a)

        else:
            m_w_previous_with_bias = \
            T.concatenate([ m_w_previous, T.alloc(1, 1) ], 0)
            v_w_previous_with_bias = \
            T.concatenate([ v_w_previous, T.alloc(0, 1) ], 0)

            m_linear = T.dot(self.m_w, m_w_previous_with_bias) / T.sqrt(self.n_inputs)
            v_linear = (T.dot(self.v_w, v_w_previous_with_bias) + \
                T.dot(self.m_w**2, v_w_previous_with_bias) + \
                T.dot(self.v_w, m_w_previous_with_bias**2)) / self.n_inputs
            return (m_linear, v_linear)
Exemple #6
0
def _theano_cpu_multi_batch_beam_grad(array, start_idxs, batch_lens, beam_width, wrap_mode, pad_left=0, pad_right=0, idx_dim=0, batch_dim=1, output_grad=None):
  # Note: This is slow and hacky. This will create an index-array of the size of the original array.
  # This is calculated on the CPU. The subtensor then can be done on the GPU, but we should avoid the first part.
  D_beam = output_grad
  prod_array_shape = T.prod(array.shape)
  prod_pad_left_shape = T.prod(pad_left.shape)
  prod_pad_right_shape = T.prod(pad_right.shape)
  D_array_tmp_size = prod_array_shape
  if wrap_mode == "pad":
    D_array_tmp_size += prod_pad_left_shape + prod_pad_right_shape
  D_array_tmp_flat = T.zeros([D_array_tmp_size], dtype="float32")  # with pad values
  if wrap_mode == "pad":
    # Calculate the indices for D_pad_left/D_pad_right in D_array_tmp_flat.
    pad_left_idxs = T.arange(prod_pad_left_shape) + prod_array_shape
    pad_right_idxs = T.arange(prod_pad_right_shape) + prod_array_shape + prod_pad_left_shape
    pad_left_idxs = pad_left_idxs.reshape(pad_left.shape)
    pad_right_idxs = pad_right_idxs.reshape(pad_right.shape)
  else:
    pad_left_idxs = pad_right_idxs = 0
  all_idxs = T.arange(T.prod(array.shape)).reshape(array.shape)
  idxs = multi_batch_beam(array=all_idxs, start_idxs=start_idxs, batch_lens=batch_lens, beam_width=beam_width,
                          wrap_mode=wrap_mode,
                          pad_left=pad_left_idxs, pad_right=pad_right_idxs,
                          idx_dim=idx_dim, batch_dim=batch_dim)
  D_array_tmp_flat = T.inc_subtensor(D_array_tmp_flat[idxs.flatten()], D_beam.flatten())
  if wrap_mode == "pad":
    D_array = D_array_tmp_flat[:prod_array_shape].reshape(array.shape)
    D_pad_left = D_array_tmp_flat[pad_left_idxs.flatten()].reshape(pad_left.shape)
    D_pad_right = D_array_tmp_flat[pad_right_idxs.flatten()].reshape(pad_right.shape)
  else:
    D_array = D_array_tmp_flat.reshape(array.shape)
    D_pad_left = D_pad_right = T.DisconnectedType()()

  return D_array, D_pad_left, D_pad_right
Exemple #7
0
def kl_normal_diagonal(mu1,sigma_diag1,mu2,sigma_diag2,dim):
    det1 = T.prod(sigma_diag1)
    det2 = T.prod(sigma_diag2)
    inv_sigma_diag2 = 1/sigma_diag_2
    mu_diff = mu2-mu1
    ret = 0.5 * (
        log(det2/det1)
        - dim
        + T.sum(inv_sigma_diag2*sigma_diag1)
        + T.dot(T.dot(mu_diff.T,inv_sigma_diag2),mu_diff)
    )
    return ret
Exemple #8
0
def max_pool_3d(input, ds, ignore_border=False):
	"""
		Takes as input a N-D tensor, where N >= 3. It downscales the input video by
		the specified factor, by keeping only the maximum value of non-overlapping
		patches of size (ds[0],ds[1],ds[2]) (time, height, width)  
		
		:type input: N-D theano tensor of input images.
		:param input: input images. Max pooling will be done over the 3 last dimensions.
		:type ds: tuple of length 3
		:param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
		:param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a
		(2,2,2) output. (3,3,3) otherwise.
	"""
	if input.ndim < 3:
		raise NotImplementedError('max_pool_3d requires a dimension >= 3')
		
	vid_dim = input.ndim
	#Maxpool frame
	frame_shape = input.shape[-2:]

	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32')
	
	input_4D = T.reshape(input, new_shape, ndim=4)
	# downsample mini-batch of videos in rows and cols
	op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)
	output = op(input_4D)
	# restore to original shape
	outshape = T.join(0, input.shape[:-2], output.shape[-2:])
	out = T.reshape(output, outshape, ndim=input.ndim)
	
	#Maxpool time 
	# output (time, rows, cols), reshape so that time is in the back
	shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4])
	input_time = out.dimshuffle(shufl)
	# reset dimensions
	vid_shape = input_time.shape[-2:]
	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input_time.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	# store as 4D tensor with shape: (batch_size,1,width,time)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32')
	input_4D_time = T.reshape(input_time, new_shape, ndim=4)
	# downsample mini-batch of videos in time
	op = DownsampleFactorMax((1,ds[0]), ignore_border)
	outtime = op(input_4D_time)
	# restore to original shape (xxx, rows, cols, time)
	outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
	shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1)))
	#shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
	return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def get_norms(model, gradients):
    """Compute norm of weights and their gradients divided by the number of elements"""
    norms = []
    grad_norms = []
    for param_name, param in model.params.iteritems():
        norm = T.sqrt(T.sum(T.square(param))) / T.prod(param.shape.astype(theano.config.floatX))
        norm.name = 'norm_' + param_name
        norms.append(norm)
        grad = gradients[param]
        grad_norm = T.sqrt(T.sum(T.square(grad))) / T.prod(grad.shape.astype(theano.config.floatX))
        grad_norm.name = 'grad_norm_' + param_name
        grad_norms.append(grad_norm)
    return norms, grad_norms
Exemple #10
0
def img_2_neibs_with_chans(inputs_sym, patch_size):
    flat_patches = neighbours.images2neibs(inputs_sym, patch_size, (1,1))
    topo_flat_patches = T.reshape(flat_patches,(inputs_sym.shape[0],
                                            inputs_sym.shape[1],
                                            inputs_sym.shape[2]-patch_size[0]+1,
                                            inputs_sym.shape[3]-patch_size[1]+1,
                                            patch_size[0],
                                            patch_size[1]))


    flat_patches = topo_flat_patches.dimshuffle(0,2,3,1,4,5)
    flat_patches = T.reshape(flat_patches, (T.prod(flat_patches.shape[:3]),
                                                 T.prod(flat_patches.shape[3:])))
    return flat_patches
def logp_theano_claims(l,nObs,T,Z,L,X,O_on):

    #O_on = O_on.astype(np.bool)
    # tempVec is 1-X*Z
    tempVec =  (1. - X.reshape((nObs,1,X.shape[1]))*(Z.T).reshape((1,Z.shape[1],Z.shape[0])))
    # Add the contribution from O = 1
    logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*TT.prod(tempVec[O_on.nonzero()],axis=1,no_zeros_in_input=True)).sum()
    #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1,no_zeros_in_input=True)).sum()
    #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1)).sum()

    # Add the contribution from O = 0
    logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*TT.prod(tempVec[(1-O_on).nonzero()],axis=1,no_zeros_in_input=True)).sum()
    #logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*tempVec[(1-O_on).nonzero()].prod(axis=1)).sum()

    return logLike
Exemple #12
0
def max_pool_3d(input, ds, ignore_border=False):
    # [n,c,x,y,z]以外の入力は受け付けない
    if input.ndim != 5:
        raise NotImplementedError(
            'max_pool_3d requires a input [n, c, x, y, z]')

    # 入力次元
    vid_dim = input.ndim

    # [y, z]フレームの次元数
    frame_shape = input.shape[-2:]

    # バッチサイズ
    # フレーム次元以外の全ての次元の要素数を掛け合わせる
    batch_size = T.prod(input.shape[:-2])
    # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright
    batch_size = T.shape_padright(batch_size, 1)


    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape),
                       'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [
            vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    vid_shape = input_time.shape[-2:]

    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size,
                              T.as_tensor([1, ]),
                              vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [
            vid_dim - 2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
	def __init__(self, input=None):
		self.input = input
		out_dim = T.prod(self.input.shape[1:])
		self.output = self.input.reshape((self.input.shape[0], out_dim))
		self.linear_output = self.output
		self.noisy_linear_output = self.output
		return
 def get_reg_ind(self):
     drop_ax, drop_ay = T.pow(T.exp(self.params[-2]), 2), T.pow(T.exp(self.params[-1]), 2)
     constant = np.cast[theano.config.floatX](.5 * np.log(self.noise_lvl) + c1 * self.noise_lvl + c2 * (self.noise_lvl**2) + c3 * (self.noise_lvl**3))
     reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant
     reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant
     reg_ind = T.cast(T.prod(self.params[3].shape), theano.config.floatX) * reg_indx + T.cast(T.prod(self.params[4].shape), theano.config.floatX) * reg_indy
     return reg_ind
Exemple #15
0
 def make_consensus(self, networks, axis=2):
   cns = self.attrs['consensus']
   if cns == 'max':
     return T.max(networks, axis=axis)
   elif cns == 'min':
     return T.min(networks, axis=axis)
   elif cns == 'mean':
     return T.mean(networks, axis=axis)
   elif cns == 'flat':
     if self.depth == 1:
       return networks
     if axis == 2:
       return networks.flatten(ndim=3)
       #return T.reshape(networks, (networks.shape[0], networks.shape[1], T.prod(networks.shape[2:]) ))
     else:
       return networks.flatten(ndim=2) # T.reshape(networks, (networks.shape[0], T.prod(networks.shape[1:]) ))
   elif cns == 'sum':
     return T.sum(networks, axis=axis, acc_dtype=theano.config.floatX)
   elif cns == 'prod':
     return T.prod(networks, axis=axis)
   elif cns == 'var':
     return T.var(networks, axis=axis)
   elif cns == 'project':
     p = self.add_param(self.create_random_uniform_weights(self.attrs['n_out'], 1, self.attrs['n_out'] + self.depth + 1))
     return T.tensordot(p, networks, [[1], [axis]])
   elif cns == 'random':
     idx = self.rng.random_integers(size=(1,), low=0, high=self.depth)
     if axis == 0: return networks[idx]
     if axis == 1: return networks[:,idx]
     if axis == 2: return networks[:,:,idx]
     if axis == 3: return networks[:,:,:,idx]
     assert False, "axis too large"
   else:
     assert False, "consensus method unknown: " + cns
Exemple #16
0
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
                mode='max'):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
        dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)
    :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    :type padding: tuple of two ints
    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'.
        Operation executed on each window.  `max` always excludes the padding
        in the computation. `average` gives you the choice to include or
        exclude it.
    :type mode: string
    """
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')
    if input.ndim == 4:
        op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                                 mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        tensor.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                             mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
Exemple #17
0
def add_normal(model, name, m, v):
    new_var = model['stream'].normal(avg=m, std=T.sqrt(v))
    new_factors = [ require(T.gt(v,0)),
                    T.log(2*np.pi), 
                    -T.prod(T.shape(new_var))*T.log(v)/2,
                    -(new_var-m)**2/2/v]
    return add_stochastic(model, name, new_var, new_factors)
Exemple #18
0
 def build(self, output, tparams=None, BNparams=None):
     if self.BN_mode:
         self.BN_eps = npt(self.BN_eps)
         if not hasattr(self, 'BN_mean'):
             self.BN_mean = T.mean(output)
         if not hasattr(self, 'BN_std'):
             m2 = (1 + 1 / (T.prod(output.shape) - 1)).astype(floatX)
             self.BN_std = T.sqrt(m2 * T.var(output) + self.BN_eps)
         if self.BN_mode == 2:
             t_mean = T.mean(output, axis=[0, 2, 3], keepdims=True)
             t_var = T.var(output, axis=[0, 2, 3], keepdims=True)
             BN_mean = BNparams[p_(self.prefix, 'mean')].dimshuffle(
                 'x', 0, 'x', 'x')
             BN_std = BNparams[p_(self.prefix, 'std')].dimshuffle(
                 'x', 0, 'x', 'x')
             output = ifelse(
                 self.training,
                 (output - t_mean) / T.sqrt(t_var + self.BN_eps),
                 (output - BN_mean) / BN_std)
             output *= tparams[p_(self.prefix, 'BN_scale')].dimshuffle(
                 'x', 0, 'x', 'x')
             output += tparams[p_(self.prefix, 'BN_shift')].dimshuffle(
                 'x', 0, 'x', 'x')
         elif self.BN_mode == 1:
             t_mean = T.mean(output)
             t_var = T.var(output)
             output = ifelse(
                 self.training,
                 (output - t_mean) / T.sqrt(t_var + self.BN_eps),
                 ((output - BNparams[p_(self.prefix, 'mean')])
                  / BNparams[p_(self.prefix, 'std')]))
             output *= tparams[p_(self.prefix, 'BN_scale')]
             output += tparams[p_(self.prefix, 'BN_shift')]
     self.output = self.activation(output)
Exemple #19
0
def gaussian_likelihood_diagonal_variance(t, mu, sig, dim):
    """
    Gaussian Likelihood along first dimension
    Parameters
    ----------
    t   : TensorVariable
    mu  : FullyConnected (Linear)
    sig : FullyConnected (Softplus)
    dim : First dimension of the target vector t
    """
    # First clip sig
    sig_clip = T.clip(sig, 1e-40, 1e40)

    # Since the variance matrix is diagonal, normalization term is easier to compute,
    # and calculus overflow can easily be prevented by first summing by 2*pi and taking square
    sig_time_2pi = T.sqrt(sig_clip * 2 * math.pi)

    #######################
    #######################
    # This is the problem... product goes to 0
    normalization_coeff = T.clip(T.prod(sig_time_2pi, axis=0), 1e-40, 1e40)
    #######################
    #######################

    # Once again, fact that sig is diagonal allows for simplifications :
    # term by term division instead of inverse matrix multiplication
    exp_term = (T.exp(- 0.5 * (t-mu) * (t-mu) / sig_clip).sum(axis=0))
    pdf = exp_term / normalization_coeff
    return pdf
Exemple #20
0
    def process(self, input, tparams, BNparams):
        mode = 'full' if self.border_mode == 'same' else self.border_mode
        output = conv.conv2d(
            input=input,
            filters=tparams[p_(self.prefix, 'W')],
            image_shape=[self.batch_size, self.n_in[0]] + self.image_shape,
            filter_shape=[self.n_out] + self.n_in,
            border_mode=mode,
            subsample=self.stride)

        if self.border_mode == 'same':
            a1 = (self.filter_size[0] - 1) // 2
            b1 = (self.filter_size[1] - 1) // 2
            a2 = self.filter_size[0] - a1
            b2 = self.filter_size[1] - b1
            if a2 == 1:
                if b2 == 1:
                    output = output[:, :, a1:, b1:]
                else:
                    output = output[:, :, a1:, b1:-b2+1]
            else:
                if b2 == 1:
                    output = output[:, :, a1:-a2+1, b1:]
                else:
                    output = output[:, :, a1:-a2+1, b1:-b2+1]

        if self.with_bias:
            output += tparams[p_(self.prefix, 'b')].dimshuffle('x', 0, 'x', 'x')

        self.BN_mean = T.mean(output, axis=[0, 2, 3])
        m2 = (1 + 1 / (T.prod(output.shape) / self.n_out - 1)).astype(floatX)
        self.BN_std = T.sqrt(m2 * T.var(output, axis=[0, 2, 3])
                             + npt(self.BN_eps))
        return output
Exemple #21
0
 def liks(self,a_na, b_nb):
     mu_nd = a_na[:, :self.d]
     sig_nd = a_na[:, self.d:]
     prodsig_n = TT.prod(sig_nd,axis=1)
     out = TT.exp( TT.square((mu_nd - b_nb)/sig_nd).sum(axis=1) * -.5 ) / (np.cast[floatX](np.sqrt(2*np.pi)**self.d) * prodsig_n)
     assert out.dtype==floatX
     return out
Exemple #22
0
def max_pool_2d(input, ds, ignore_border=False):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale. (2,2) will halve the image in each dimension.
    :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2) will generate a
      (2,2) output. (3,3) otherwise.
    """
    if input.ndim < 2:
        raise NotImplementedError("max_pool_2d requires a dimension >= 2")

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), "int64")
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
Exemple #23
0
def relevance_conv_z(out_relevances, inputs, weights, bias=None):
    norms_for_relevances = conv2d(inputs, weights)
    if bias is not None:
        norms_for_relevances += bias.dimshuffle("x", 0, "x", "x")
    # stabilize
    # prevent division by 0 and division by small numbers
    eps = 1e-4
    norms_for_relevances += T.sgn(norms_for_relevances) * eps
    norms_for_relevances += T.eq(norms_for_relevances, 0) * eps

    normed_relevances = out_relevances / norms_for_relevances
    # upconv
    in_relevances = conv2d(normed_relevances, weights.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")

    in_relevances_proper = in_relevances * inputs

    if bias is not None:
        bias_relevance = bias.dimshuffle("x", 0, "x", "x") * normed_relevances
        # Divide bias by weight size before convolving back
        # mean across channel, 0, 1 dims (hope this is correct?)
        fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
        bias_rel_in = conv2d(
            fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
        )
        in_relevances_proper += bias_rel_in

    return in_relevances_proper
Exemple #24
0
def _backward_negative_z(inputs, weights, normed_relevances, bias=None):
    inputs_plus = inputs * T.gt(inputs, 0)
    weights_plus = weights * T.gt(weights, 0)
    inputs_minus = inputs * T.lt(inputs, 0)
    weights_minus = weights * T.lt(weights, 0)
    # Compute weights+ * inputs- and weights- * inputs+
    negative_part_a = conv2d(
        normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_a *= inputs_minus
    negative_part_b = conv2d(
        normed_relevances, weights_minus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_b *= inputs_plus

    together = negative_part_a + negative_part_b
    if bias is not None:
        bias_negative = bias * T.lt(bias, 0)
        bias_relevance = bias_negative.dimshuffle("x", 0, "x", "x") * normed_relevances
        # Divide bias by weight size before convolving back
        # mean across channel, 0, 1 dims (hope this is correct?)
        fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
        bias_rel_in = conv2d(
            fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
        )
        together += bias_rel_in
    return together
Exemple #25
0
    def __init__(self, input, n_in, n_out, prefix='hidden', W=None, b=None,
            activation=T.tanh):
        self.input = input
        if W is None:
            W_values = numpy.asarray(
                np.random.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name=_p(prefix, 'W'), borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name=_p(prefix, 'b'), borrow=True)

        self.W = W
        self.b = b

        self.pre_activation = T.dot(input, self.W) + self.b
        if activation is None:
            self.output = self.pre_activation
        elif activation == T.nnet.softmax:
            shape= self.pre_activation.shape
            tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1]))
            self.output = T.nnet.softmax(tmp).reshape(shape)
        else:
            self.output = activation(self.pre_activation)

        self.params = {_p(prefix, 'W'):W, _p(prefix, 'b'):b}
Exemple #26
0
def relevance_conv_z_plus(out_relevances, inputs, weights, bias=None):
    if bias is not None:
        log.warning("Bias not respected for conv z_plus")
    # hack for negative inputs
    # inputs = T.abs_(inputs)
    weights_plus = weights * T.gt(weights, 0)
    norms_for_relevances = conv2d(inputs, weights_plus)
    # prevent division by 0...
    # adds 1 to every entry that is 0 -> sets 0s to 1
    relevances_are_0 = T.eq(norms_for_relevances, 0)
    norms_for_relevances += relevances_are_0 * 1

    normed_relevances = out_relevances / norms_for_relevances
    # upconv
    in_relevances = conv2d(normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")

    in_relevances_proper = in_relevances * inputs

    # Correct for those parts where all inputs of a relevance were
    # zero, spread relevance equally them
    pool_ones = T.ones(weights_plus.shape, dtype=np.float32)
    # mean across channel, 0, 1 dims (hope this is correct?)
    pool_fractions = pool_ones / T.prod(weights_plus.shape[1:]).astype(theano.config.floatX)
    in_relevances_from_0 = conv2d(
        out_relevances * relevances_are_0, pool_fractions.dimshuffle(1, 0, 2, 3), subsample=(1, 1), border_mode="full"
    )

    in_relevances_proper += in_relevances_from_0

    return in_relevances_proper
def max_pool_switch_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
            index_type='flattened', index_scope='local'):

    if input.ndim < 2:
        raise NotImplementedError('max_pool_switched_2d requires a dimension >= 2')
    if ignore_border is None:
        ignore_border = False
    if input.ndim == 4:
        op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding,
                  index_type=index_type, index_scope=index_scope)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding,
              index_type=index_type, index_scope=index_scope)
    output = op(input_4D)

    # restore to original shape
    outshp = T.join(0, input.shape[:-2], output.shape[-2:])
    return T.reshape(output, outshp, ndim=input.ndim)
def unpool_switch_2d(input, ds, st=None,
            index_type='flattened', index_scope='local',
            original_input_shape=None):

    if input.ndim < 3:
        raise NotImplementedError('unpool_switched_2d requires a dimension >= 3')
    if input.ndim == 4:
        op = UnpoolSwitch(ds, st=st,
                  index_type=index_type, index_scope=index_scope,
                  original_input_shape=original_input_shape)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-3:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-3])
    batch_size = T.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        img_shape), 'int64')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = UnpoolSwitch(ds, st=st,
              index_type=index_type, index_scope=index_scope,
              original_input_shape=original_input_shape)
    output = op(input_4D)

    # restore to original shape
    outshp = T.join(0, input.shape[:-2], output.shape[-2:])
    return T.reshape(output, outshp, ndim=input.ndim)
Exemple #29
0
    def connect(self, inputs):

        # keep last dimension for vectorized operation
        output = inputs.reshape((inputs.shape[0], T.prod(inputs.shape[1:])))
        self.inputs.append(inputs)
        self.outputs.append(output)
        return output
Exemple #30
0
def build_obj(z_sample,z_mu,z_sigma,x_orig,x_out):
    z_sigma_fixed = z_sigma
    z_sigma_inv = 1/(z_sigma_fixed)
    det_z_sigma = T.prod(z_sigma)
    C = 1./(T.sqrt(((2*np.pi)**z_dim) * det_z_sigma))
    log_q_z_given_x = - 0.5*T.dot(z_sigma_inv, ((z_sample-z_mu)**2).T) + T.log(C) # log(C) can be omitted
    q_z_given_x = C * T.exp(log_q_z_given_x)
    log_p_x_given_z = -(1/(x_sigma))*(((x_orig-x_out)**2).sum()) # because p(x|z) is gaussian
    log_p_z = - (z_sample**2).sum() # gaussian prior with mean 0 and cov I
    #reconstruction_error_const = (0.5*(x_dim*np.log(np.pi)+1)).astype('float32')
    reconstruction_error_proper = 0.5*T.sum((x_orig-x_out)**2)
    reconstruction_error = reconstruction_error_proper #+ reconstruction_error_const
    regularizer = kl_normal_diagonal_vs_unit(z_mu,z_sigma,z_dim)
    obj = reconstruction_error + regularizer
    obj_scalar = obj.reshape((),ndim=0)
    return obj_scalar,[
        reconstruction_error, #1
        regularizer,#2
        log_q_z_given_x,#3
        det_z_sigma,#4
        q_z_given_x,#5
        log_p_x_given_z,#6
        log_p_z,#7
        z_sample,#8
        z_mu,#9
        z_sigma,#10,
        z_sigma_inv,#11
        z_sigma_fixed,#12
        C,#13
        reconstruction_error_proper,#14
    ]
Exemple #31
0
def numel(x):
    return T.prod(x.shape)
def build_simple_model(
        input_var,
        filter_size=[5, 11, 25],
        n_filters=64,
        n_classes=6,
        depth=1,
        last_filter_size=1,
        nb_in_channels=1,
        #bn_relu_conv = False, #unused for now
        out_nonlin=softmax):
    '''
    Parameters:
    -----------
    input_var : theano tensor
    filter_size : list of odd int (to fit with same padding),
                size of filter_size list determines the number of
                convLayer to Concatenate
    n_filters : int, number of filters for each convLayer
    n_classes : int
    depth : int, number of stacked convolution before concatenation
    last_filter_size : int, must be set to 1 (the older version had
            a last_filter_size of 3, that was an error
            the argument is there to be able to reassign weights correctly
            when testing)
    out_nonlin : default=softmax, non linearity function
    '''

    net = {}

    net['input'] = InputLayer((None, nb_in_channels, 200), input_var)
    #incoming_layer = 'input'

    #Convolution layers
    n_conv = len(filter_size)
    for d in range(depth):
        for i in range(n_conv):

            incoming_layer = 'input' if d == 0 else 'relu' + str(
                d - 1) + '_' + str(i)

            net['conv' + str(d) + '_' + str(i)] = ConvLayer(
                net[incoming_layer],
                num_filters=n_filters,
                filter_size=filter_size[i],
                pad='same',
                nonlinearity=None)

            net['bn' + str(d) + '_' + str(i)] = BatchNormLayer(
                net['conv' + str(d) + '_' + str(i)])
            net['relu' + str(d) + '_' + str(i)] = NonlinearityLayer(
                net['bn' + str(d) + '_' + str(i)], nonlinearity=rectify)
            #batch_norm insert batch normalization BETWEEN conv and relu
            #non linearity

    #Concatenation layer of the aboves ConvLayer
    layers_to_concatenate = [
        net['relu' + str(depth - 1) + '_' + str(i)] for i in range(n_conv)
    ]

    net['concat'] = ConcatLayer(layers_to_concatenate)
    incoming_layer = 'concat'

    #Output layer
    net['final_conv'] = ConvLayer(net[incoming_layer],
                                  num_filters=n_classes,
                                  filter_size=last_filter_size,
                                  pad='same')
    incoming_layer = 'final_conv'

    #DimshuffleLayer and ReshapeLayer to fit the softmax implementation
    #(it needs a 1D or 2D tensor, not a 3D tensor)
    net['final_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0, 2, 1))
    incoming_layer = 'final_dimshuffle'

    layerSize = lasagne.layers.get_output(net[incoming_layer]).shape
    net['final_reshape'] = ReshapeLayer(net[incoming_layer],
                                        (T.prod(layerSize[0:2]), layerSize[2]))
    # (200*batch_size,n_classes))
    incoming_layer = 'final_reshape'

    #This is the layer that computes the prediction
    net['last_layer'] = NonlinearityLayer(net[incoming_layer],
                                          nonlinearity=out_nonlin)
    incoming_layer = 'last_layer'

    #Layers needed to visualize the prediction of the network
    net['probs_reshape'] = ReshapeLayer(
        net[incoming_layer], (layerSize[0], layerSize[1], n_classes))
    incoming_layer = 'probs_reshape'

    net['probs_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0, 2, 1))

    # [net[l] for l in ['last_layer']] : used to directly compute the output
    #                       of the network
    # net : dictionary containing each layer {name : Layer instance}
    return [net[l] for l in ['last_layer']], net
Exemple #33
0
    def build(self):
        """
        Build the model variables.
        """
        CMReduction = self.build_reduction_var()

        # Window of active countermeasures extended into the past
        Earlier_ActiveCMs = self.d.get_ActiveCMs(
            self.d.Ds[0] - pd.DateOffset(self.CMDelayCut), self.d.Ds[-1])

        # [region, CM, day] Reduction factor for each CM,C,D
        ActiveCMReduction = (T.reshape(CMReduction,
                                       (1, self.nCMs, 1))**Earlier_ActiveCMs)

        # [region, day] Reduction factor from CMs for each C,D (noise added below)
        GrowthReduction = self.Det("GrowthReduction",
                                   T.prod(ActiveCMReduction, axis=1),
                                   plot_trace=False)

        # [region, day] Convolution of GrowthReduction by DelayProb along days
        DelayedGrowthReduction = self.Det(
            "DelayedGrowthReduction",
            geom_convolution(GrowthReduction, self.CMDelayProb,
                             axis=1)[:, self.CMDelayCut:],
            plot_trace=False,
        )

        # [] Baseline growth rate (wide prior OK, mean estimates ~10% daily growth)
        BaseGrowthRate = self.LogNorm("BaseGrowthRate", 1.2, 2.3)

        # [region] Region growth rate
        # TODO: Estimate growth rate variance
        RegionGrowthRate = self.LogNorm("RegionGrowthRate",
                                        BaseGrowthRate,
                                        0.3,
                                        shape=(self.nRs, ))

        # [region] Region unreliability as common scale multiplier of its:
        # * measurements (measurement unreliability)
        # * expected growth noise
        # TODO: Estimate good prior (but can be weak?)
        RegionScaleMult = self.LogNorm("RegionScaleMult",
                                       1.0,
                                       1.0,
                                       shape=(self.nRs, ))

        # [region, day] The ideal predicted daily growth
        PredictedGrowth = self.Det(
            "PredictedGrowth",
            T.reshape(RegionGrowthRate,
                      (self.nRs, 1)) * DelayedGrowthReduction,
            plot_trace=False,
        )

        # [region, day] The actual (still hidden) growth rate each day
        # TODO: Estimate noise varince (should be small, measurement variance below)
        #       Miscalibration: too low: time effects pushed into CMs, too high: explains away CMs
        RealGrowth = self.LogNorm(
            "RealGrowth",
            PredictedGrowth,
            RegionScaleMult.reshape((self.nRs, 1)) * 0.1,
            shape=(self.nRs, self.nDs),
            plot_trace=False,
        )

        # [region, day] Multiplicative noise applied to predicted growth rate
        RealGrowthNoise = self.Det("RealGrowthNoise",
                                   RealGrowth / PredictedGrowth,
                                   plot_trace=False)

        # [region] Initial size of epidemic (the day before the start, only those detected; wide prior OK)
        InitialSize = self.LogNorm("InitialSize", 1.0, 10, shape=(self.nRs, ))

        # [region, day] The number of cases that would be detected with noiseless testing
        # (Noise source includes both false-P/N rates and local variance in test volume and targetting)
        # (Since we ony care about growth rates and assume consistent testing, it is fine to ignore real size)
        Size = self.Det(
            "Size",
            T.reshape(InitialSize,
                      (self.nRs, 1)) * self.RealGrowth.cumprod(axis=1),
            plot_trace=False,
        )

        # [region, day] Cummulative tested positives
        Observed = self.LogNorm(
            "Observed",
            Size,
            0.4,  # self.RegionScaleMult.reshape((self.nRs, 1)) * 0.4,
            shape=(self.nRs, self.nDs),
            observed=self.d.Confirmed,
            plot_trace=False,
        )

        # [region, day] Multiplicative noise applied to predicted growth rate
        # Note: computed backwards, since self.Observed needs to be a distribution
        ObservedNoise = self.Det("ObservedNoise",
                                 Observed / Size,
                                 plot_trace=False)
def buildFCN8(nb_in_channels,
              input_var,
              path_weights='/Tmp/romerosa/itinf/models/' +
              'camvid/new_fcn8_model_best.npz',
              n_classes=21,
              load_weights=True,
              void_labels=[],
              trainable=False,
              layer=['probs_dimshuffle'],
              pascal=False,
              temperature=1.0,
              dropout=0.5):
    '''
    Build fcn8 model
    '''

    net = {}

    # Contracting path
    net['input'] = InputLayer((None, nb_in_channels, None, None), input_var)

    # pool 1
    net['conv1_1'] = ConvLayer(net['input'],
                               64,
                               3,
                               pad=100,
                               flip_filters=False)
    net['conv1_2'] = ConvLayer(net['conv1_1'],
                               64,
                               3,
                               pad='same',
                               flip_filters=False)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)

    # pool 2
    net['conv2_1'] = ConvLayer(net['pool1'],
                               128,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv2_2'] = ConvLayer(net['conv2_1'],
                               128,
                               3,
                               pad='same',
                               flip_filters=False)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)

    # pool 3
    net['conv3_1'] = ConvLayer(net['pool2'],
                               256,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv3_2'] = ConvLayer(net['conv3_1'],
                               256,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv3_3'] = ConvLayer(net['conv3_2'],
                               256,
                               3,
                               pad='same',
                               flip_filters=False)
    net['pool3'] = PoolLayer(net['conv3_3'], 2)

    # pool 4
    net['conv4_1'] = ConvLayer(net['pool3'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv4_2'] = ConvLayer(net['conv4_1'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv4_3'] = ConvLayer(net['conv4_2'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['pool4'] = PoolLayer(net['conv4_3'], 2)

    # pool 5
    net['conv5_1'] = ConvLayer(net['pool4'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv5_2'] = ConvLayer(net['conv5_1'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['conv5_3'] = ConvLayer(net['conv5_2'],
                               512,
                               3,
                               pad='same',
                               flip_filters=False)
    net['pool5'] = PoolLayer(net['conv5_3'], 2)

    # fc6
    net['fc6'] = ConvLayer(net['pool5'],
                           4096,
                           7,
                           pad='valid',
                           flip_filters=False)
    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout)

    # fc7
    net['fc7'] = ConvLayer(net['fc6_dropout'],
                           4096,
                           1,
                           pad='valid',
                           flip_filters=False)
    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout)

    net['score_fr'] = ConvLayer(net['fc7_dropout'],
                                n_classes,
                                1,
                                pad='valid',
                                flip_filters=False)

    # Upsampling path

    # Unpool
    net['score2'] = DeconvLayer(net['score_fr'],
                                n_classes,
                                4,
                                stride=2,
                                crop='valid',
                                nonlinearity=linear)
    net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1, pad='same')
    net['score_fused'] = ElemwiseSumLayer(
        (net['score2'], net['score_pool4']),
        cropping=[None, None, 'center', 'center'])

    # Unpool
    net['score4'] = DeconvLayer(net['score_fused'],
                                n_classes,
                                4,
                                stride=2,
                                crop='valid',
                                nonlinearity=linear)
    net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1, pad='valid')
    net['score_final'] = ElemwiseSumLayer(
        (net['score4'], net['score_pool3']),
        cropping=[None, None, 'center', 'center'])
    # Unpool
    net['upsample'] = DeconvLayer(net['score_final'],
                                  n_classes,
                                  16,
                                  stride=8,
                                  crop='valid',
                                  nonlinearity=linear)
    upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1]
    net['input_tmp'] = InputLayer((None, upsample_shape, None, None),
                                  input_var)

    net['score'] = ElemwiseMergeLayer(
        (net['input_tmp'], net['upsample']),
        merge_function=lambda input, deconv: deconv,
        cropping=[None, None, 'center', 'center'])

    # Final dimshuffle, reshape and softmax
    net['final_dimshuffle'] = \
        lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1))
    laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape
    net['final_reshape'] = \
        lasagne.layers.ReshapeLayer(net['final_dimshuffle'],
                                    (T.prod(laySize[0:3]),
                                     laySize[3]))
    net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'],
                                                    nonlinearity=softmax)
    # end-snippet-1

    # Do not train
    if not trainable:
        freezeParameters(net['probs'])

    # Go back to 4D
    net['probs_reshape'] = ReshapeLayer(
        net['probs'], (laySize[0], laySize[1], laySize[2], n_classes))

    net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'],
                                              (0, 3, 1, 2))

    # Apply temperature
    if load_weights:
        soft_value = net['upsample'].W.get_value() / temperature
        net['upsample'].W.set_value(soft_value)
        soft_value = net['upsample'].b.get_value() / temperature
        net['upsample'].b.set_value(soft_value)

    return [net[el] for el in layer]
Exemple #35
0
def batch_flatten(x):
    '''Turn a n-D tensor into a 2D tensor where
    the first dimension is conserved.
    '''
    x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0]))
    return x
Exemple #36
0
def max_pool_2d(input,
                ds,
                ignore_border=None,
                st=None,
                padding=(0, 0),
                mode='max'):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    Parameters
    ----------
    input : N-D theano tensor of input images
        Input images. Max pooling will be done over the 2 last dimensions.
    ds : tuple of length 2
        Factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    ignore_border : bool (default None, will print a warning and set to False)
        When True, (5,5) input with ds=(2,2) will generate a (2,2) output.
        (3,3) otherwise.
    st : tuple of lenght 2
        Stride size, which is the number of shifts over rows/cols to get the
        next pool region. If st is None, it is considered equal to ds
        (no overlap on pooling regions).
    padding : tuple of two ints
        (pad_h, pad_w), pad zeros to extend beyond four borders
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
        Operation executed on each window. `max` and `sum` always exclude
        the padding in the computation. `average` gives you the choice to
        include or exclude it.

    """
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')
    if ignore_border is None:
        warnings.warn(
            "max_pool_2d() will have the parameter ignore_border"
            " default value changed to True (currently"
            " False). To have consistent behavior with all Theano"
            " version, explicitly add the parameter"
            "  ignore_border=True. (this is also faster than"
            " ignore_border=False)",
            stacklevel=2)
        ignore_border = False
    if input.ndim == 4:
        op = DownsampleFactorMax(ds,
                                 ignore_border,
                                 st=st,
                                 padding=padding,
                                 mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds,
                             ignore_border,
                             st=st,
                             padding=padding,
                             mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
Exemple #37
0
def max_pool_3d(input, ds, ignore_border=False):
    """
    Takes as input a N-D tensor, where N >= 3. It downscales the input video by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1],ds[2]) (time, height, width)
    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 3 last dimensions.
    :type ds: tuple of length 3
    :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
    :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a
      (2,2,2) output. (3,3,3) otherwise.
    """

    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of
    # downsamplefactormax. First maxpool frames as usual.
    # Then maxpool the time dimension. Shift the time dimension to the third
    # position, so rows and cols are in the back

    # extract dimensions
    frame_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([
            1,
        ]), frame_shape), 'int32')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = Pool(ignore_border)
    output = op(input_4D, ws=(ds[1], ds[2]))
    # restore to original shape
    outshape = tensor.join(0, input.shape[:-2], output.shape[-2:])
    out = tensor.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time

    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] +
             [vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input_time.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([
            1,
        ]), vid_shape), 'int32')
    input_4D_time = tensor.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = Pool(ignore_border)
    outtime = op(input_4D_time, ws=(1, ds[0]))
    # output
    # restore to original shape (xxx, rows, cols, time)
    outshape = tensor.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] +
             [vid_dim - 2])
    return tensor.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #38
0
    def get_regs(self, x0_, x, M):
        """
        Regularization terms.

        """
        regs = 0

        #=================================================================================
        # L1 recurrent weights
        #=================================================================================

        L1_Wrec = self.config['L1_Wrec']
        if L1_Wrec > 0:
            print("L1_Wrec = {}".format(L1_Wrec))

            W    = self.get('Wrec')
            reg  = tensor.sum(abs(W))
            size = tensor.prod(W.shape)

            #W     = self.get('Wrec_gates')
            #reg  += tensor.sum(abs(W))
            #size += tensor.prod(W.shape)

            regs += L1_Wrec * reg/size

        #=================================================================================
        # L2 recurrent weights
        #=================================================================================

        L2_Wrec = self.config['L2_Wrec']
        if L2_Wrec > 0:
            print("L2_Wrec = {}".format(L2_Wrec))

            W    = self.get('Wrec')
            reg  = tensor.sum(tensor.sqr(W))
            size = tensor.prod(W.shape)

            W     = self.get('Wrec_gates')
            reg  += tensor.sum(tensor.sqr(W))
            size += tensor.prod(W.shape)

            regs += L2_Wrec * reg/size

        #=================================================================================
        # Firing rates
        #=================================================================================

        L2_r = self.config['L2_r']
        if L2_r  > 0:
            # Repeat (T, B) -> (T, B, N)
            M_ = (tensor.tile(M.T, (x.shape[-1], 1, 1))).T

            # Combine t=0 with t>0
            x_all = tensor.concatenate(
                [x0_.reshape((1, x0_.shape[0], x0_.shape[1])), x],
                axis=0
                )

            # Firing rate
            r = self.f_hidden(x_all)

            # Regularization
            regs += L2_r * tensor.sum(tensor.sqr(r)*M_)/tensor.sum(M_)

        #=================================================================================

        return regs
    def normal(self,
               size,
               avg=0.0,
               std=1.0,
               ndim=None,
               dtype=None,
               nstreams=None,
               truncate=False,
               **kwargs):
        """
        Sample a tensor of values from a normal distribution.

        Parameters
        ----------
        size : int_vector_like
            Array dimensions for the output tensor.
        avg : float_like, optional
            The mean value for the truncated normal to sample from (defaults to 0.0).
        std : float_like, optional
            The standard deviation for the truncated normal to sample from (defaults to 1.0).
        truncate : bool, optional
            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
            When this flag is set, the standard deviation of the result will be less than the one specified.
        ndim : int, optional
            The number of dimensions for the output tensor (defaults to None).
            This argument is necessary if the size argument is ambiguous on the number of dimensions.
        dtype : str, optional
            The data-type for the output tensor. If not specified,
            the dtype is inferred from avg and std, but it is at least as precise as floatX.
        kwargs
            Other keyword arguments for random number generation (see uniform).

        Returns
        -------
        samples : TensorVariable
            A Theano tensor of samples randomly drawn from a normal distribution.

        """
        size = _check_size(size)
        avg = undefined_grad(as_tensor_variable(avg))
        std = undefined_grad(as_tensor_variable(std))

        if dtype is None:
            dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)

        avg = tensor.cast(avg, dtype=dtype)
        std = tensor.cast(std, dtype=dtype)

        # generate even number of uniform samples
        # Do manual constant folding to lower optiimizer work.
        if isinstance(size, theano.Constant):
            n_odd_samples = size.prod(dtype='int64')
        else:
            n_odd_samples = tensor.prod(size, dtype='int64')
        n_even_samples = n_odd_samples + n_odd_samples % 2
        uniform = self.uniform((n_even_samples, ),
                               low=0.,
                               high=1.,
                               ndim=1,
                               dtype=dtype,
                               nstreams=nstreams,
                               **kwargs)

        # box-muller transform
        u1 = uniform[:n_even_samples // 2]
        u2 = uniform[n_even_samples // 2:]
        r = tensor.sqrt(-2.0 * tensor.log(u1))
        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
        cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta)
        z0 = r * cos_theta
        z1 = r * sin_theta

        if truncate:
            # use valid samples
            to_fix0 = (z0 < -2.) | (z0 > 2.)
            to_fix1 = (z1 < -2.) | (z1 > 2.)
            z0_valid = z0[tensor.nonzero(~to_fix0)]
            z1_valid = z1[tensor.nonzero(~to_fix1)]

            # re-sample invalid samples
            to_fix0 = tensor.nonzero(to_fix0)[0]
            to_fix1 = tensor.nonzero(to_fix1)[0]
            n_fix_samples = to_fix0.size + to_fix1.size
            lower = tensor.constant(1. / np.e**2, dtype=dtype)
            u_fix = self.uniform((n_fix_samples, ),
                                 low=lower,
                                 high=1.,
                                 ndim=1,
                                 dtype=dtype,
                                 nstreams=nstreams,
                                 **kwargs)
            r_fix = tensor.sqrt(-2. * tensor.log(u_fix))
            z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0]
            z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1]

            # pack everything together to a useful result
            norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid,
                                       z1_fixed)
        else:
            norm_samples = tensor.join(0, z0, z1)
        if isinstance(n_odd_samples, theano.Variable):
            samples = norm_samples[:n_odd_samples]
        elif n_odd_samples % 2 == 1:
            samples = norm_samples[:-1]
        else:
            samples = norm_samples
        samples = tensor.reshape(samples, newshape=size, ndim=ndim)
        samples *= std
        samples += avg

        return samples
    def init_opt(self):
        # obs_var_raw = self.env.observation_space.new_tensor_variable(
        #     'obs',
        #     extra_dims=1,
        # )

        obs_var_raw = ext.new_tensor(
            'obs', ndim=3, dtype=theano.config.floatX)  # todo: check the dtype

        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1,
        )

        # this will have to be the advantage every self.period timesteps
        advantage_var = ext.new_tensor('advantage',
                                       ndim=1,
                                       dtype=theano.config.floatX)

        obs_var_sparse = ext.new_tensor(
            'sparse_obs',
            ndim=2,
            dtype=theano.config.
            floatX  # todo: check this with carlos, refer to discrete.py in rllab.spaces
        )

        assert isinstance(self.policy, HierarchicalPolicy)

        # todo: assumptions: 1 trajectory, which is a multiple of p; that the obs_var_probs is valid

        # undoing the reshape, so that batch sampling is ok
        obs_var = TT.reshape(obs_var_raw, [
            obs_var_raw.shape[0] * obs_var_raw.shape[1], obs_var_raw.shape[2]
        ])
        # obs_var = obs_var_raw

        # i, j should contain the probability of latent j at time step self.period*i
        # should be a len(obs)//self.period by len(self.latent) tensor
        latent_probs = self.policy.manager.dist_info_sym(
            obs_var_sparse)['prob']

        # get the distribution parameters
        # dist_info_vars = []
        # for latent in self.latents:
        #     self.policy.low_policy.set_latent_train(latent)
        #     dist_info_vars.append(self.policy.low_policy.dist_info_sym(obs_var))
        # hopefully the above line takes multiple samples, and state_info_vars not needed as input

        dist_info_vars = self.policy.low_policy.dist_info_sym_all_latents(
            obs_var)
        probs = [
            TT.exp(self.diagonal.log_likelihood_sym(action_var, dist_info))
            for dist_info in dist_info_vars
        ]

        # need to reshape at the end
        reshaped_probs = [
            TT.reshape(prob, [obs_var.shape[0] // self.period, self.period])
            for prob in probs
        ]

        # now, multiply out each row and concatenate
        subtrajectory_probs = TT.stack([
            TT.prod(reshaped_prob, axis=1) for reshaped_prob in reshaped_probs
        ],
                                       axis=1)
        # shape error might come out of here

        # elementwise multiplication, then sum up each individual row and take log
        likelihood = TT.log(TT.max(subtrajectory_probs * latent_probs, axis=1))

        surr_loss = -TT.mean(likelihood * advantage_var)

        input_list = [obs_var_raw, obs_var_sparse, action_var, advantage_var]
        # npo has state_info_vars and old_dist_info_vars, I don't think I need them until I go for NPO/TRPO

        self.optimizer.update_opt(loss=surr_loss,
                                  target=self.policy,
                                  inputs=input_list)
        return dict()
Exemple #41
0
    def normal(self,
               size=None,
               avg=0.0,
               std=1.0,
               ndim=None,
               dtype=None,
               nstreams=None):
        """
        :param size: Can be a list of integers or Theano variables (ex: the
        shape of another Theano Variable)

        :param dtype: The output data type. If dtype is not specified, it will
        be inferred from the dtype of low and high, but will be at least as
        precise as floatX.

        :param nstreams: Number of streams.
        """
        # We need an even number of ]0,1[ samples. Then we split them
        # in two halves. First half becomes our U1's for Box-Muller,
        # second half our U2's. See Wikipedia page:
        # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
        avg = as_tensor_variable(avg)
        std = as_tensor_variable(std)

        if dtype is None:
            dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)

        avg = cast(avg, dtype)
        std = cast(std, dtype)

        evened = False
        constant = False
        if isinstance(size, tuple) and all([isinstance(i, int) for i in size]):
            constant = True
            n_samples = numpy.prod(size)

            if n_samples % 2 == 1:
                n_samples += 1
                evened = True
        else:
            #if even, don't change, if odd, +1
            n_samples = prod(size) + (prod(size) % 2)
        flattened = self.uniform(size=(n_samples, ),
                                 dtype=dtype,
                                 nstreams=nstreams)

        if constant:
            U1 = flattened[:n_samples // 2]
            U2 = flattened[n_samples // 2:]
        else:
            U1 = flattened[:prod(flattened.shape) // 2]
            U2 = flattened[prod(flattened.shape) // 2:]

        #normal_samples = zeros_like(flattened)
        sqrt_ln_U1 = sqrt(-2.0 * log(U1))
        # TypeError: 'TensorVariable' object does not support item assignment
        # so this doesn't work...
        #normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
        #normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)

        # so trying this instead
        first_half = sqrt_ln_U1 * cos(
            numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
        second_half = sqrt_ln_U1 * sin(
            numpy.array(2.0 * numpy.pi, dtype=dtype) * U2)
        normal_samples = join(0, first_half, second_half)

        final_samples = None
        if evened:
            final_samples = normal_samples[:-1]
        elif constant:
            final_samples = normal_samples
        else:
            final_samples = normal_samples[:prod(size)]

        if size:
            final_samples = final_samples.reshape(size)

        final_samples = avg + std * final_samples

        assert final_samples.dtype == dtype
        return final_samples
Exemple #42
0
def _get_scaling(total_size, shape, ndim):
    """
    Gets scaling constant for logp

    Parameters
    ----------
    total_size : int or list[int]
    shape : shape
        shape to scale
    ndim : int
        ndim hint

    Returns
    -------
    scalar
    """
    if total_size is None:
        coef = pm.floatX(1)
    elif isinstance(total_size, int):
        if ndim >= 1:
            denom = shape[0]
        else:
            denom = 1
        coef = pm.floatX(total_size) / pm.floatX(denom)
    elif isinstance(total_size, (list, tuple)):
        if not all(
                isinstance(i, int)
                for i in total_size if (i is not Ellipsis and i is not None)):
            raise TypeError('Unrecognized `total_size` type, expected '
                            'int or list of ints, got %r' % total_size)
        if Ellipsis in total_size:
            sep = total_size.index(Ellipsis)
            begin = total_size[:sep]
            end = total_size[sep + 1:]
            if Ellipsis in end:
                raise ValueError(
                    'Double Ellipsis in `total_size` is restricted, got %r' %
                    total_size)
        else:
            begin = total_size
            end = []
        if (len(begin) + len(end)) > ndim:
            raise ValueError('Length of `total_size` is too big, '
                             'number of scalings is bigger that ndim, got %r' %
                             total_size)
        elif (len(begin) + len(end)) == 0:
            return pm.floatX(1)
        if len(end) > 0:
            shp_end = shape[-len(end):]
        else:
            shp_end = np.asarray([])
        shp_begin = shape[:len(begin)]
        begin_coef = [
            pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin)
            if t is not None
        ]
        end_coef = [
            pm.floatX(t) / shp_end[i] for i, t in enumerate(end)
            if t is not None
        ]
        coefs = begin_coef + end_coef
        coef = tt.prod(coefs)
    else:
        raise TypeError('Unrecognized `total_size` type, expected '
                        'int or list of ints, got %r' % total_size)
    return tt.as_tensor(pm.floatX(coef))
def batch_flatten(x):
    x = T.reshape(x, (x.shape[0], T.prod(x.shape) // x.shape[0]))
    return x
Exemple #44
0
 def prod(self, x, axis=None, keepdims=False):
     '''Multiply the values in a tensor, alongside the specified axis.
     '''
     return T.prod(x, axis=axis, keepdims=keepdims)