Ejemplo n.º 1
0
def max_pool(images, imgshp, maxpoolshp):
    """Implements a max pooling layer

    Takes as input a 2D tensor of shape batch_size x img_size and
    performs max pooling.  Max pooling downsamples by taking the max
    value in a given area, here defined by maxpoolshp. Outputs a 2D
    tensor of shape batch_size x output_size.

    :param images: 2D tensor containing images on which to apply convolution.
                   Assumed to be of shape batch_size x img_size
    :param imgshp: tuple containing image dimensions
    :param maxpoolshp: tuple containing shape of area to max pool over

    :return: out1, symbolic result (2D tensor)
    :return: out2, logical shape of the output
    """
    poolsize = np.int64(np.prod(maxpoolshp))

    # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval(
        imgshp, maxpoolshp, maxpoolshp, mode="valid")

    #    print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
    #    print 'imgshp = ', imgshp
    #    print 'maxpoolshp = ', maxpoolshp
    #    print 'outshp = ', outshp

    # build sparse matrix, then generate stack of image patches
    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = sparse.structured_dot(csc, images.T).T

    pshape = aet.stack([
        images.shape[0] * aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0]),
        aet.as_tensor(poolsize),
    ])
    patch_stack = reshape(patches, pshape, ndim=3)

    out1 = tt_max(patch_stack, axis=2)

    pshape = aet.stack([
        images.shape[0],
        aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0]),
    ])
    out2 = reshape(out1, pshape, ndim=3)

    out3 = DimShuffle(out2.broadcastable, (0, 2, 1))(out2)

    return aet.flatten(out3, 2), outshp
Ejemplo n.º 2
0
def convolve(
        kerns,
        kshp,
        nkern,
        images,
        imgshp,
        step=(1, 1),
        bias=None,
        mode="valid",
        flatten=True,
):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    # start by computing output dimensions, size, etc
    kern_size = np.int64(np.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval(
        imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = aet.stack([
        images.shape[0] * aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0] * kern_size),
    ])
    patch_stack = reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = aet.stack([
        images.shape[0],
        aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(nkern)
    ])
    tensout = reshape(output, newshp, ndim=3)
    output = DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = aet.flatten(output, 2)

    return output, np.hstack((nkern, outshp))