def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ poolsize = np.int64(np.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if np.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval( imgshp, maxpoolshp, maxpoolshp, mode="valid") # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = aet.stack([ images.shape[0] * aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0]), aet.as_tensor(poolsize), ]) patch_stack = reshape(patches, pshape, ndim=3) out1 = tt_max(patch_stack, axis=2) pshape = aet.stack([ images.shape[0], aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0]), ]) out2 = reshape(out1, pshape, ndim=3) out3 = DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return aet.flatten(out3, 2), outshp
def convolve( kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None, mode="valid", flatten=True, ): """Convolution implementation by sparse matrix multiplication. :note: For best speed, put the matrix which you expect to be smaller as the 'kernel' argument "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order If flatten is "False", the output feature map will have shape: .. code-block:: python batch_size x number of kernels x output_size If flatten is "True", the output feature map will have shape: .. code-block:: python batch_size x number of kernels * output_size .. note:: IMPORTANT: note that this means that each feature map (image generate by each kernel) is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order kerns is a 2D tensor of shape nkern x N.prod(kshp) :param kerns: 2D tensor containing kernels which are applied at every pixel :param kshp: tuple containing actual dimensions of kernel (not symbolic) :param nkern: number of kernels/filters to apply. nkern=1 will apply one common filter to all input pixels :param images: tensor containing images on which to apply convolution :param imgshp: tuple containing image dimensions :param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) :param mode: 'full', 'valid' see CSM.evaluate function for details :param sumdims: dimensions over which to sum for the tensordot operation. By default ((2,),(1,)) assumes kerns is a nkern x kernsize matrix and images is a batchsize x imgsize matrix containing flattened images in raster order :param flatten: flatten the last 2 dimensions of the output. By default, instead of generating a batchsize x outsize x nkern tensor, will flatten to batchsize x outsize*nkern :return: out1, symbolic result :return: out2, logical shape of the output img (nkern,heigt,width) :TODO: test for 1D and think of how to do n-d convolutions """ # start by computing output dimensions, size, etc kern_size = np.int64(np.prod(kshp)) # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if np.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval( imgshp, kshp, step, mode) # build sparse matrix, then generate stack of image patches csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape) patches = (sparse.structured_dot(csc, images.T)).T # compute output of linear classifier pshape = aet.stack([ images.shape[0] * aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0] * kern_size), ]) patch_stack = reshape(patches, pshape, ndim=2) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = dot(patch_stack, kerns.T) # add bias across each feature map (more efficient to do it now) if bias is not None: output += bias # now to have feature maps in raster order ... # go from bsize*outshp x nkern to bsize x nkern*outshp newshp = aet.stack([ images.shape[0], aet.as_tensor(np.prod(outshp)), aet.as_tensor(nkern) ]) tensout = reshape(output, newshp, ndim=3) output = DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout) if flatten: output = aet.flatten(output, 2) return output, np.hstack((nkern, outshp))