def blah(i, node, thunk): imap[node] = str(i) if print_prog:# and node.op.__class__ is T.DimShuffle: if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True): print node.op == T.DimShuffle((), ['x', 'x'], inplace = True), print node.inputs[0], type(node.inputs[0]), print node.inputs[0].equals(T.constant(2)), outputs = node.outputs inputs = theano.gof.graph.inputs(outputs) print 'node ', i, node, print ':'.join([imap[inp.owner] for inp in node.inputs])
def test_sparseblockgemvF(self): # Test the fortan order for W (which can happen in the grad for some # graphs). b = tensor.fmatrix() W = tensor.ftensor4() h = tensor.ftensor3() iIdx = tensor.imatrix() oIdx = tensor.imatrix() o = self.gemv_op( b.take(oIdx, axis=0), tensor.DimShuffle((False, False, False, False), (0, 1, 3, 2))( tensor.as_tensor_variable(W) ), h, iIdx, oIdx, ) f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode) W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data() th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val) ref_out = self.gemv_numpy( b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val ) utt.assert_allclose(ref_out, th_out)
def test_bug_2009_06_02_trac_387(): y = tensor.lvector('y') f = theano.function([y], tensor.int_div( tensor.DimShuffle(y[0].broadcastable, ['x'])(y[0]), 2)) print(f(numpy.ones(1, dtype='int64') * 3))
def test_bug_2009_06_02_trac_387(): y = tensor.lvector("y") f = theano.function([y], tensor.int_div( tensor.DimShuffle(y[0].broadcastable, ["x"])(y[0]), 2)) print(f(np.ones(1, dtype="int64") * 3))
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ N = numpy poolsize = N.int64(N.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid') # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = tensor.stack([images.shape[0] *\ tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]), tensor.as_tensor(poolsize)]) patch_stack = tensor.reshape(patches, pshape, ndim=3) out1 = tensor.max(patch_stack, axis=2) pshape = tensor.stack([ images.shape[0], tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]) ]) out2 = tensor.reshape(out1, pshape, ndim=3) out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return tensor.flatten(out3, 2), outshp
def makeKeepDims_local(self, x, y, axis): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) if axis is None: axis = numpy.arange(x.ndim) i = 0 new_dims = [] for j, _ in enumerate(x.shape): if j in axis: new_dims.append('x') else: new_dims.append(i) i += 1 return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
def makeKeepDims_local(self, x, y, axis): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) if axis is None: axis = numpy.arange(x.ndim) elif isinstance(axis, int): axis = [axis] i = 0 newaxis = [] for a in axis: if a < 0: a += x.type.ndim newaxis.append(a) new_dims = [] for j, _ in enumerate(x.shape): if j in newaxis: new_dims.append('x') else: new_dims.append(i) i += 1 return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
def makeKeepDims_local(self, x, y, axis): if axis is None: newaxis = list(range(x.ndim)) elif isinstance(axis, integer_types): if axis < 0: newaxis = [axis + x.type.ndim] else: newaxis = [axis] else: newaxis = [] for a in axis: if a < 0: a += x.type.ndim newaxis.append(a) i = 0 new_dims = [] for j, _ in enumerate(x.shape): if j in newaxis: new_dims.append('x') else: new_dims.append(i) i += 1 return tensor.DimShuffle(y.type.broadcastable, new_dims)(y)
def convolve(kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None, mode='valid', flatten=True): """Convolution implementation by sparse matrix multiplication. :note: For best speed, put the matrix which you expect to be smaller as the 'kernel' argument "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order If flatten is "False", the output feature map will have shape: .. code-block:: python batch_size x number of kernels x output_size If flatten is "True", the output feature map will have shape: .. code-block:: python batch_size x number of kernels * output_size .. note:: IMPORTANT: note that this means that each feature map (image generate by each kernel) is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order kerns is a 2D tensor of shape nkern x N.prod(kshp) :param kerns: 2D tensor containing kernels which are applied at every pixel :param kshp: tuple containing actual dimensions of kernel (not symbolic) :param nkern: number of kernels/filters to apply. nkern=1 will apply one common filter to all input pixels :param images: tensor containing images on which to apply convolution :param imgshp: tuple containing image dimensions :param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) :param mode: 'full', 'valid' see CSM.evaluate function for details :param sumdims: dimensions over which to sum for the tensordot operation. By default ((2,),(1,)) assumes kerns is a nkern x kernsize matrix and images is a batchsize x imgsize matrix containing flattened images in raster order :param flatten: flatten the last 2 dimensions of the output. By default, instead of generating a batchsize x outsize x nkern tensor, will flatten to batchsize x outsize*nkern :return: out1, symbolic result :return: out2, logical shape of the output img (nkern,heigt,width) :TODO: test for 1D and think of how to do n-d convolutions """ N = numpy # start by computing output dimensions, size, etc kern_size = N.int64(N.prod(kshp)) # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, kshp, step, mode) # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = (sparse.structured_dot(csc, images.T)).T # compute output of linear classifier pshape = tensor.stack(images.shape[0] * tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(imgshp[0] * kern_size)) patch_stack = tensor.reshape(patches, pshape, ndim=2) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = tensor.dot(patch_stack, kerns.T) # add bias across each feature map (more efficient to do it now) if bias is not None: output += bias # now to have feature maps in raster order ... # go from bsize*outshp x nkern to bsize x nkern*outshp newshp = tensor.stack(images.shape[0],\ tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(nkern)) tensout = tensor.reshape(output, newshp, ndim=3) output = tensor.DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout) if flatten: output = tensor.flatten(output, 2) return output, N.hstack((nkern, outshp))
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, W=None, bhid=None, bvis=None): """ Initialize the dA class by specifying the number of visible units (the dimension d of the input ), the number of hidden units ( the dimension d' of the latent or hidden space ) and the corruption level. The constructor also receives symbolic variables for the input, weights and bias. Such a symbolic variables are useful when, for example the input is the result of some computations, or when weights are shared between the dA and an MLP layer. When dealing with SdAs this always happens, the dA on layer 2 gets as input the output of the dA on layer 1, and the weights of the dA are used in the second stage of training to construct an MLP. :type numpy_rng: numpy.random.RandomState :param numpy_rng: number random generator used to generate weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type input: theano.tensor.TensorType :param input: a symbolic description of the input or None for standalone dA :type n_visible: int :param n_visible: number of visible units :type n_hidden: int :param n_hidden: number of hidden units :type W: theano.tensor.TensorType :param W: Theano variable pointing to a set of weights that should be shared belong the dA and another architecture; if dA should be standalone set this to None :type bhid: theano.tensor.TensorType :param bhid: Theano variable pointing to a set of biases values (for hidden units) that should be shared belong dA and another architecture; if dA should be standalone set this to None :type bvis: theano.tensor.TensorType :param bvis: Theano variable pointing to a set of biases values (for visible units) that should be shared belong dA and another architecture; if dA should be standalone set this to None """ self.n_visible = n_visible self.n_hidden = n_hidden # create a Theano random generator that gives symbolic random values if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # note : W' was written as `W_prime` and b' as `b_prime` if not W: # W is initialized with `initial_W` which is uniformely sampled # from -4*sqrt(6./(n_visible+n_hidden)) and # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if # converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU initial_W = numpy.asarray(numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)), high=4 * numpy.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX) W = theano.shared(value=initial_W, name='W', borrow=True) if not bvis: bvis = theano.shared(value=numpy.zeros(n_visible, dtype=theano.config.floatX), borrow=True) if not bhid: bhid = theano.shared(value=numpy.zeros(n_hidden, dtype=theano.config.floatX), name='b', borrow=True) self.W = W # b corresponds to the bias of the hidden self.b = bhid # b_prime corresponds to the bias of the visible self.b_prime = bvis # tied weights, therefore W_prime is W transpose self.W_prime = T.DimShuffle(initial_W) self.theano_rng = theano_rng # if no input is given, generate a variable representing the input if input is None: # we use a matrix because we expect a minibatch of several # examples, each example being a row self.x = T.dmatrix(name='input') else: self.x = input self.params = [self.W, self.b, self.b_prime]
class Opt(object): merge = theano.gof.MergeOptimizer() gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1) gemm_opt_2 = theano.gof.TopoOptimizer( # d -= a * (dot()+transpose(dot)) theano.gof.PatternSub( (T.sub_inplace, 'd', (T.mul, dict(pattern=(T.DimShuffle( (), ['x', 'x'], inplace=True), 'a'), allow_multiple_clients=True), (T.add, (T.dot, 'b', 'c'), (T.transpose_inplace, (T.dot, 'f', 'g'))))), (T.gemm, (T.gemm, 'd', (T.neg, 'a'), (T.transpose_inplace, 'g'), (T.transpose_inplace, 'f'), T.constant(1.0)), (T.neg, 'a'), 'b', 'c', T.constant(1.0)), allow_multiple_clients=False)) sqr = [] sqr.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.mul, 'x', 'x'), (T.sqr, 'x'), allow_multiple_clients=True))) sqr.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.pow, 'x', (T.DimShuffle( (), ['x', 'x'], inplace=True), T.constant(2))), (T.sqr, 'x'), allow_multiple_clients=True))) ident_opt_list = [] ident_opt_list.append( # remove explicit copies theano.gof.TopoOptimizer( theano.gof.PatternSub((T.tensor_copy, 'x'), 'x', allow_multiple_clients=True))) ident_opt_list.append( # remove double-transpose theano.gof.TopoOptimizer( theano.gof.PatternSub( (T.transpose_inplace, (T.transpose_inplace, 'x')), 'x', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.sqr, (T.sqrt, 'x')), 'x', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.sqrt, (T.sqr, 'x')), 'x', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.mul, 'x', (T.div, 'y', 'x')), 'y', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.mul, (T.div, 'y', 'x'), 'x'), 'y', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.div, (T.mul, 'y', 'x'), 'x'), 'y', allow_multiple_clients=True))) ident_opt_list.append( theano.gof.TopoOptimizer( theano.gof.PatternSub((T.div, (T.mul, 'y', 'x'), 'y'), 'x', allow_multiple_clients=True))) def __call__(self, env): self.merge(env) #eliminate identities if 0: print('SKIPPING optimizations') else: for opt in self.ident_opt_list: opt(env) for opt in self.sqr: opt(env) self.gemm_opt_1(env) self.gemm_opt_2(env) self.merge(env)