Beispiel #1
    def make_node(self, images, hid_grads, output_shape):
        if not isinstance(images.type, CudaNdarrayType):
            raise TypeError("WeightActs: expected images.type "
                            "to be CudaNdarrayType, "
                            "got " + str(images.type))

        if not isinstance(hid_grads.type, CudaNdarrayType):
            raise TypeError("WeightActs: expected hid_acts.type "
                            "to be CudaNdarrayType, "
                            "got " + str(hid_grads.type))

        assert images.ndim == 4
        assert hid_grads.ndim == 4

        input_channels_broadcastable = images.type.broadcastable[0]
        # We don't know anything about filter_rows or filter_cols at compile
        # time, so we assume they're not broadcastable.
        filter_rows_broadcastable = False
        filter_cols_broadcastable = False
        output_channels_broadcastable = hid_grads.type.broadcastable[0]

        weights_grads_type = CudaNdarrayType(
            (input_channels_broadcastable, filter_rows_broadcastable,
             filter_cols_broadcastable, output_channels_broadcastable))

        partial_sums_type = CudaNdarrayType((False, ) * 5)
        weights_grads = weights_grads_type()
        partial_sums = partial_sums_type()

        return Apply(self, [images, hid_grads, output_shape],
                     [weights_grads, partial_sums])
Beispiel #2
    def make_node(self, images, top_down):
        .. todo::

        images = as_cuda_ndarray_variable(images)
        top_down = as_cuda_ndarray_variable(top_down)

        assert images.ndim == 4
        assert top_down.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        houtput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        houtput_type = CudaNdarrayType(broadcastable=houtput_broadcastable)
        houtput = houtput_type()

        poutput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        poutput_type = CudaNdarrayType(broadcastable=poutput_broadcastable)
        poutput = poutput_type()

        return Apply(self, [images, top_down], [houtput, poutput])
Beispiel #3
def test_float32_shared_constructor():

    npy_row = numpy.zeros((1, 10), dtype='float32')

    def eq(a, b):
        return a == b

    # test that we can create a CudaNdarray
    assert (f32sc(npy_row).type == CudaNdarrayType((False, False)))

    # test that broadcastable arg is accepted, and that they
    # don't strictly have to be tuples
    assert eq(
        f32sc(npy_row, broadcastable=(True, False)).type,
        CudaNdarrayType((True, False)))
    assert eq(
        f32sc(npy_row, broadcastable=[True, False]).type,
        CudaNdarrayType((True, False)))
    assert eq(
        f32sc(npy_row, broadcastable=numpy.array([True, False])).type,
        CudaNdarrayType([True, False]))

    # test that we can make non-matrix shared vars
    assert eq(
        f32sc(numpy.zeros((2, 3, 4, 5), dtype='float32')).type,
        CudaNdarrayType((False, ) * 4))
Beispiel #4
 def tensor_type(cls, dtype, ndim):
   # noinspection PyUnresolvedReferences,PyPackageRequirements
   from theano.sandbox.cuda import CudaNdarrayType
   if dtype != "float32":
     print("%s: WARNING: cannot handle type %r, will use float32 instead" % ("GpuNativeOp", dtype))
     dtype = "float32"
   return CudaNdarrayType(dtype=dtype, broadcastable=(False,) * ndim)
Beispiel #5
 def make_node(self, pvals):
     assert pvals.dtype == 'float32'
     if not isinstance(pvals.type, CudaNdarrayType):
         raise TypeError('pvals must be cudandarray', pvals)
     if self.odtype == 'auto':
         odtype = pvals.dtype
         odtype = self.odtype
     if odtype != pvals.dtype:
         raise NotImplementedError('GpuKArgmax works only if'
             'self.odtype == pvals.dtype', odtype, pvals.dtype)
     br = (pvals.broadcastable[0], pvals.broadcastable[1])
     vals = CudaNdarrayType(broadcastable=br)()
     indx = CudaNdarrayType(broadcastable=br)()
     return Apply(self, [pvals], [vals, indx])
    def make_node(self, images, acts, denoms, dout):
        .. todo::

        if not isinstance(images.type, CudaNdarrayType):
            inputs = images, acts, denoms, dout
            names = "images", "acts", "denoms", "dout"
            for name, var in zip(names, inputs):
                if not isinstance(var.type, CudaNdarrayType):
                    raise TypeError("CrossMapNormUndo: expected %s.type "
                                    "to be CudaNdarrayType, "
                                    "got %s" (name, str(images.type)))
        assert images.ndim == 4
        assert acts.ndim == 4
        assert denoms.ndim == 4
        assert dout.ndim == 4
        # Not strictly necessary I don't think
        assert images.type.broadcastable == acts.type.broadcastable
        assert images.type.broadcastable == denoms.type.broadcastable
        assert images.type.broadcastable == dout.type.broadcastable

        targets_broadcastable = tuple(images.type.broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()
        out_acts = targets_type()
        return Apply(self, [images, acts, denoms, dout], [targets, out_acts])
Beispiel #7
 def make_node(self, x):
     from theano.sandbox.cuda import CudaNdarrayType
     if not isinstance(x.type, GpuArrayType):
         raise TypeError(x)
     if x.type.dtype != 'float32':
         raise TypeError(x)
     return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
Beispiel #8
    def make_node(self, hid_acts, filters):

        if not isinstance(hid_acts.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, "
                    "got " + str(hid_acts.type))

        if not isinstance(filters.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, "
                    "got " + str(filters.type))

        assert hid_acts.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = hid_acts.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [hid_acts, filters], [targets])
Beispiel #9
def test_optimization():
    op = CrossMapNorm(16, 15. / 16., 1, True)
    x_ = theano.tensor.TensorVariable(CudaNdarrayType([False] * 4))
    f = theano.function([x_], theano.grad(op(x_)[0].sum(), x_))
    nodes = [
        x for x in f.maker.fgraph.apply_nodes if type(x.op) == CrossMapNormUndo
    assert len(nodes) == 1
    assert nodes[0].op.inplace
Beispiel #10
 def make_node(self, mask_idx, image):
     mask_idx = contiguouse(mask_idx)
     image = contiguouse(image)
     assert mask_idx.dtype == "float32"
     assert image.dtype == "float32"
     output_type = CudaNdarrayType(broadcastable=(False, ) * 5)
     return theano.Apply(
         self, [mask_idx, image],
         [output_type(), output_type(),
Beispiel #11
def values_eq_approx_high_tol(a, b):
    """This fct is needed to don't have DebugMode raise useless
    error due to rounding error.

    This happen with big input size due to change in the order of
    rtol = None
    if a.size > 100000:
        # For float32 the default rtol is 1e-5
        rtol = 5e-5
    return CudaNdarrayType.values_eq_approx(a, b, rtol=rtol)
Beispiel #12
Beispiel #13
    def make_node(self, W, b, d, H, RShape=None):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
        H_ = as_cuda_ndarray_variable(H)
        if RShape:
            RShape_ = T.as_tensor_variable(RShape)
            RShape_ = T.as_tensor_variable([-1, -1, -1])

        return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_],
    def make_node(self, ten4, neib_shape, neib_step):
        assert ten4.dtype == 'float32'
        if not isinstance(ten4.type, CudaNdarrayType):
            raise TypeError('ten4 must be cudandarray', ten4)

        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1

        return Apply(self, [ten4, neib_shape, neib_step], [
            CudaNdarrayType(broadcastable=(False, False),
Beispiel #15
    def make_node(self, images):
        if not isinstance(images.type, CudaNdarrayType):
            raise TypeError("CrossMapNorm: expected images.type to be CudaNdarrayType, "
                    "got " + str(images.type))

        assert images.ndim == 4

        targets_broadcastable = images.type.broadcastable
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        denoms = targets_type()
        targets = targets_type()

        return Apply(self, [images], [targets, denoms])
Beispiel #16
    def make_node(self, V, d, WShape, dCdH):
        :param V: visible
        :param d: strides
        :param WShape: shapes of the weights -> shape of this op output
        :param dCdH: other input with what V will be convolved.
        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)

        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
Beispiel #17
    def make_node(self, ten4, neib_shape, neib_step):
        ten4 = as_cuda_ndarray_variable(ten4)
        neib_shape = tensor.as_tensor_variable(neib_shape)
        neib_step = tensor.as_tensor_variable(neib_step)

        assert ten4.ndim == 4
        assert ten4.dtype == 'float32'
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1
        assert "int" in neib_shape.dtype
        assert "int" in neib_step.dtype

        return Apply(self, [ten4, neib_shape, neib_step],
                     [CudaNdarrayType(broadcastable=(False, False),
 def make_node(self, images, filters):
     ibcast = images.broadcastable
     fbcast = filters.broadcastable
     igroups, icolors_per_group, irows, icols, icount = ibcast
     fmodulesR, fmodulesC, fcolors, frows, fcols = fbcast[:-2]
     fgroups, filters_per_group = fbcast[-2:]
     hbcast = (fgroups, filters_per_group, fmodulesR, fmodulesC, icount)
     if not isinstance(images.type, CudaNdarrayType):
         raise TypeError('gpu_filter_acts requires CudaNdarray images',
     if not isinstance(filters.type, CudaNdarrayType):
         raise TypeError('gpu_filter_acts requires CudaNdarray filters',
     htype = CudaNdarrayType(broadcastable=hbcast)
     return theano.gof.Apply(self, [images, filters], [htype()])
Beispiel #19
 def make_node(self, V, W, b, d):
         :param V: Visible unit, input
         :param W: Weights, filter
         :param b: bias
         :param d: strides when moving the filter over the input
     V_ = as_cuda_ndarray_variable(V)
     W_ = as_cuda_ndarray_variable(W)
     b_ = as_cuda_ndarray_variable(b)
     d_ = T.as_tensor_variable(d)
     broad = (V_.broadcastable[0], W_.broadcastable[0], False, False, False)
     return theano.Apply(
         inputs=[V_, W_, b_, d_],
         outputs=[CudaNdarrayType(dtype=V_.dtype, broadcastable=broad)()])
Beispiel #20
    def make_node(self, *inputs):
        assert self.nout == 1
        assert len(inputs) == 2  # TODO remove
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")

        otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        return out_node
Beispiel #21
    def make_node(self, hid_acts, filters, output_shape=None):
        .. todo::


        hid_acts : WRITEME
        filters : WRITEME
        output_shape : 2-element TensorVariable, optional
            The spatial shape of the image

        if not isinstance(hid_acts.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, "
                    "got " + str(hid_acts.type))

        if not isinstance(filters.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, "
                    "got " + str(filters.type))

        if output_shape is None:
            if self.stride != 1:
                raise ValueError("You must specify an output_shape for ImageActs if the stride is not 1.")
            hid_shape = hid_acts.shape[1:3]
            kernel_shape = filters.shape[1:3]
            output_shape = hid_shape + kernel_shape - 2 * self.pad - 1

        assert hid_acts.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = hid_acts.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [hid_acts, filters, output_shape], [targets])
Beispiel #22
 def make_node(self, pvals, unis):
     assert pvals.dtype == 'float32'
     assert unis.dtype == 'float32'
     if not isinstance(pvals.type, CudaNdarrayType):
         raise TypeError('pvals must be cudandarray', pvals)
     if not isinstance(unis.type, CudaNdarrayType):
         raise TypeError('unis must be cudandarray', unis)       
     if self.odtype == 'auto':
         odtype = pvals.dtype
         odtype = self.odtype
     if odtype != pvals.dtype:
         raise NotImplementedError('GpuMultinomialFromUniform2 works only if'
             'self.odtype == pvals.dtype', odtype, pvals.dtype)
     br = (unis.broadcastable[0], unis.broadcastable[1])
     out = CudaNdarrayType(broadcastable=br)()
     return Apply(self, [pvals, unis], [out])
Beispiel #23
    def make_node(self, mask_idx, image, og_sum, og_pow):
        mask_idx = contiguouse(mask_idx)
        image = contiguouse(image)
        inputs = [mask_idx, image]
        if str(og_sum) == "<DisconnectedType>" and \
                str(og_pow) == "<DisconnectedType>":
            raise ValueError("At least sum or pow gradient must be provided")

        if str(og_sum) != "<DisconnectedType>":
            og_sum = contiguouse(og_sum)
        if str(og_pow) != "<DisconnectedType>":
            og_pow = contiguouse(og_pow)

        output_type = CudaNdarrayType(broadcastable=(False, ) * 4)
        return theano.Apply(self, inputs, [output_type()])
Beispiel #24
    def make_node(self, images):
        images = as_cuda_ndarray_variable(images)

        assert images.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [images], [targets])
Beispiel #25
    def new_auto_update(cls, generator, ndim, dtype, size, seed):
        Return a symbolic sample from generator.

        cls dictates the random variable (e.g. uniform, normal)

        v_size = theano.tensor.as_tensor_variable(size)
        if ndim is None:
            ndim = get_vector_length(v_size)
        self = cls(output_type=CudaNdarrayType((False, ) * ndim),

        o_gen, sample = self(generator, cast(v_size, 'int32'))

        sample.generator = generator  # for user
        sample.update = (generator, o_gen)  # for CURAND_RandomStreams
        generator.default_update = o_gen  # for pfunc uses this attribute
        return sample
Beispiel #26
    def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")
        assert len(inputs) == 2  # TODO remove

        otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim)
        assert self.nout == 1

        fct_name = "pycuda_elemwise_%s" % str(self.scalar_op)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        in_name = ["i" + str(id) for id in range(len(inputs))]
        out_name = ["o" + str(id) for id in range(self.nout)]
        c_code = self.scalar_op.c_code(out_node, "some_name",
                                       tuple([n + "[i]" for n in in_name]),
                                       tuple(n + "[i]" for n in out_name), {})
        c_code_param = ", ".join([var.type.dtype_specs()[1] + " *" + name
                                  for var, name in (zip(inputs, in_name) +
                                                        out_name))] +
                                 ["int size"])
        mod = SourceModule("""
#include <numpy/arrayobject.h>
  __global__ void %s(%s)
    int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y);
    i += threadIdx.x + threadIdx.y*blockDim.x;
  """ % (fct_name, c_code_param, c_code))
        self.pycuda_fct = mod.get_function(fct_name)
        return out_node
Beispiel #27
    def make_node(self, images):
        .. todo::

        images = as_cuda_ndarray_variable(images)

        assert images.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()
        seed = self.seed_state
        seed = as_cuda_ndarray_variable(seed)
        return Apply(self, [images, seed], [targets])
Beispiel #28
    def make_node(self, V, d, WShape, dCdH):

            Shapes of the weights -> shape of this op output.
            Other input with what V will be convolved.

        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)
        broad = (False,) * 5
        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
Beispiel #29
 def output_type(self, inp):
     return CudaNdarrayType(broadcastable=[False] * inp.type.ndim)
Beispiel #30
 def output_type(self, inp):
     # add one extra dim for real/imag
     return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1))
Beispiel #31
 def output_type(self, inp):
     # remove extra real/imag dim
     return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim - 1))