def test_alloc_empty():
    for dt in ["float32", "int8"]:
        f = theano.function([],
                            GpuAllocEmpty(dt, context_name=test_ctx_name)(2,
                                                                          3))
        assert len(f.maker.fgraph.apply_nodes) == 1
        out = f()
        assert out.shape == (2, 3)
        assert out.dtype == dt

    f = theano.function(
        [],
        [
            GpuAllocEmpty("uint64", test_ctx_name)(3, 2),
            GpuAllocEmpty("uint64", test_ctx_name)(3, 2),
        ],
    )
    out = f()
    assert out[0].shape == (3, 2)
    assert out[0].dtype == "uint64"
    assert out[1].shape == (3, 2)
    assert out[1].dtype == "uint64"
    assert (len([
        node for node in f.maker.fgraph.apply_nodes
        if isinstance(node.op, GpuAllocEmpty)
    ]) == 1)
Beispiel #2
0
 def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid'):
     """ 
     sets up dummy convolutional forward pass and uses its grad as deconv
     currently only tested/working with same padding
     """
 
     #Always return a c contiguous output.
     #Copy the input only if it is not already c contiguous.
     img = gpu_contiguous(X)
     kerns = gpu_contiguous(self.W)
 
     #Implement Alloc on the gpu, but without initializing memory.
     if someconfigs.backend == 'gpuarray':
         gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \
                     img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape
         #This Op builds a convolution descriptor for use in the other convolution operations.
         desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\
                                     conv_mode=conv_mode)(kerns.shape)
         out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\
                                                             img.shape[3]*subsample[1])
     elif someconfigs.backend == 'cudandarray':
         gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \
                         img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape
         desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\
                                     conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape)
         out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\
                                                             img.shape[3]*subsample[1])
 
     #The convolution gradient with respect to the inputs.
     d_img = GpuDnnConvGradI()(kerns, img, out, desc)
     return activation_fn_th(d_img, atype=atype) 
    def deconv(self,
               X,
               subsample=(2, 2),
               border_mode=(2, 2),
               conv_mode='conv',
               atype='sigmoid',
               testF=False):
        """ 
        sets up dummy convolutional forward pass and uses its grad as deconv
        currently only tested/working with same padding
        """

        #Always return a c contiguous output.
        #Copy the input only if it is not already c contiguous.
        img = gpu_contiguous(X)
        kerns = gpu_contiguous(self.W)

        #Implement Alloc on the gpu, but without initializing memory.
        if someconfigs.backend == 'gpuarray':
            gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \
                        img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape
            #This Op builds a convolution descriptor for use in the other convolution operations.
            desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\
                                        conv_mode=conv_mode)(kerns.shape)
            out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\
                                                                img.shape[3]*subsample[1])
        elif someconfigs.backend == 'cudandarray':
            gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \
                            img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape
            desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\
                                        conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape)
            out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\
                                                                img.shape[3]*subsample[1])

        #The convolution gradient with respect to the inputs.
        d_img = GpuDnnConvGradI()(kerns, img, out, desc)
        ConH0 = d_img  #+ self.b.dimshuffle('x', 0, 'x', 'x')

        if testF:
            ConH1 = (ConH0 - self.stat_mean.dimshuffle('x', 0, 'x', 'x')) \
                                / (self.stat_std.dimshuffle('x', 0, 'x', 'x') + TINY)
        else:
            mean = ConH0.mean(axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
            std = T.mean(T.sqr(ConH0 - mean),
                         axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
            ConH1 = (ConH0 - mean) / T.sqrt(std + TINY)

        ConH2 = self.eta.dimshuffle('x', 0, 'x', 'x') * ConH1 \
                        + self.beta.dimshuffle('x', 0, 'x', 'x')

        return activation_fn_th(ConH2, atype=atype)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'):
    img = gpu_contiguous(T.cast(X, 'float32'))
    kerns = gpu_contiguous(T.cast(w, 'float32'))
    desc = GpuDnnConvDesc(border_mode=border_mode,
                          subsample=subsample,
                          conv_mode=conv_mode)(kerns.shape)
    out = GpuAllocEmpty(dtype='float32', context_name=infer_context_name(X))(
        img.shape[0], kerns.shape[1], img.shape[2] * subsample[0],
        img.shape[3] * subsample[1])
    d_img = GpuDnnConvGradI()(kerns, img, out, desc)
    return d_img
Beispiel #5
0
 def opt(node):
     if type(node.op) != op or node.op.inplace:
         return
     inputs = list(node.inputs)
     alloc = inputs[idx]
     if (alloc.owner and isinstance(alloc.owner.op, GpuAllocEmpty)
             and len(alloc.clients) > 1):
         alloc_op = GpuAllocEmpty(alloc.owner.op.dtype,
                                  alloc.owner.op.context_name)
         inputs[idx] = alloc_op(*alloc.owner.inputs)
     with inherit_stack_trace(node.outputs):
         return maker(node, inputs)
Beispiel #6
0
 def apply(self, input_):
     if self.use_bias:
         W, b = self.parameters
     else:
         W, = self.parameters
     W = W.dimshuffle(1, 0, 2, 3)
     img = GpuContiguous(input_)
     kerns = GpuContiguous(W)
     desc = GpuDnnConvDesc(border_mode=self.pad,
                           subsample=self.stride,
                           conv_mode='conv')(GpuAllocEmpty(
                               img.shape[0], kerns.shape[1],
                               img.shape[2] * self.stride[0],
                               img.shape[3] * self.stride[1]).shape,
                                             kerns.shape)
     out = GpuAllocEmpty(img.shape[0], kerns.shape[1],
                         img.shape[2] * self.stride[0],
                         img.shape[3] * self.stride[1])
     output = GpuDnnConvGradI()(kerns, img, out, desc)
     if self.use_bias:
         output += b.dimshuffle('x', 0, 'x', 'x')
     return output
Beispiel #7
0
def local_abstractconv_cudnn_alt(node):
    if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
                                AbstractConv2d_gradInputs)):
        return

    if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1):
        return None
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(
            isinstance(p, tuple) for p in node.op.border_mode):
        # Asymmetric padding not yet supported
        return None
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]

    if not dnn_available(inp1.type.context_name):
        return

    op = node.op
    border_mode = node.op.border_mode
    subsample = node.op.subsample
    filter_dilation = node.op.filter_dilation
    num_groups = node.op.num_groups
    precision, _ = get_precision(None, [inp1, inp2])

    if node.op.filter_flip:
        conv_mode = "conv"
    else:
        conv_mode = "cross"

    if isinstance(op, AbstractConv2d):
        if border_mode == "half" or subsample != (1, 1) or num_groups != 1:
            return None
        if border_mode == "full":
            direction_hint = "bprop inputs"
        elif border_mode == "valid" and filter_dilation == (1, 1):
            direction_hint = "bprop weights"
        else:
            return None

        rval = dnn_conv(
            inp1,
            inp2,
            border_mode=border_mode,
            subsample=subsample,
            dilation=filter_dilation,
            direction_hint=direction_hint,
            conv_mode=conv_mode,
            num_groups=num_groups,
        )

    elif isinstance(op, AbstractConv2d_gradWeights):
        if (border_mode == "valid" and subsample == (1, 1)
                and filter_dilation == (1, 1) and num_groups == 1):
            img = gpu_contiguous(inp1)
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(img, topgrad)
            img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3))
            ishape = [shape_i_op(i)(img) for i in range(img.ndim)]
            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            out_shp = get_conv_output_shape(
                ishape,
                tshape,
                border_mode=border_mode,
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            out_shp = assert_conv_shape(out_shp)
            out = GpuAllocEmpty(dtype=img.dtype,
                                context_name=ctx_name)(*out_shp)
            desc = GpuDnnConvDesc(
                border_mode=border_mode,
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode="cross",
                precision=precision,
            )(out.shape)

            conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad,
                                                                out, desc)
            if conv_mode == "conv":
                conv = conv[:, :, ::-1, ::-1]

            rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
        else:
            return None

    elif isinstance(op, AbstractConv2d_gradInputs):
        if border_mode == "valid" and subsample == (1, 1) and num_groups == 1:
            kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(kerns, topgrad)
            conv_mode = "cross" if conv_mode == "conv" else "conv"
            desc = GpuDnnConvDesc(
                border_mode="full",
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode=conv_mode,
                precision=precision,
            )(kerns.shape)

            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
            shape = get_conv_output_shape(
                tshape,
                kshape,
                border_mode="full",
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            shape = assert_conv_shape(shape)
            out = GpuAllocEmpty(dtype=topgrad.dtype,
                                context_name=ctx_name)(*shape)
            rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns,
                                                                out, desc)
        else:
            return None

    return [rval]