コード例 #1
0
    def make_node(self, images, hid_grads, output_shape):
        if not isinstance(images.type, CudaNdarrayType):
            raise TypeError("WeightActs: expected images.type "
                            "to be CudaNdarrayType, "
                            "got " + str(images.type))

        if not isinstance(hid_grads.type, CudaNdarrayType):
            raise TypeError("WeightActs: expected hid_acts.type "
                            "to be CudaNdarrayType, "
                            "got " + str(hid_grads.type))

        assert images.ndim == 4
        assert hid_grads.ndim == 4

        input_channels_broadcastable = images.type.broadcastable[0]
        # We don't know anything about filter_rows or filter_cols at compile
        # time, so we assume they're not broadcastable.
        filter_rows_broadcastable = False
        filter_cols_broadcastable = False
        output_channels_broadcastable = hid_grads.type.broadcastable[0]

        weights_grads_type = CudaNdarrayType(
            (input_channels_broadcastable, filter_rows_broadcastable,
             filter_cols_broadcastable, output_channels_broadcastable))

        partial_sums_type = CudaNdarrayType((False, ) * 5)
        weights_grads = weights_grads_type()
        partial_sums = partial_sums_type()

        return Apply(self, [images, hid_grads, output_shape],
                     [weights_grads, partial_sums])
コード例 #2
0
    def make_node(self, images, top_down):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)
        top_down = as_cuda_ndarray_variable(top_down)

        assert images.ndim == 4
        assert top_down.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        houtput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        houtput_type = CudaNdarrayType(broadcastable=houtput_broadcastable)
        houtput = houtput_type()

        poutput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        poutput_type = CudaNdarrayType(broadcastable=poutput_broadcastable)
        poutput = poutput_type()

        return Apply(self, [images, top_down], [houtput, poutput])
コード例 #3
0
def test_float32_shared_constructor():

    npy_row = numpy.zeros((1, 10), dtype='float32')

    def eq(a, b):
        return a == b

    # test that we can create a CudaNdarray
    assert (f32sc(npy_row).type == CudaNdarrayType((False, False)))

    # test that broadcastable arg is accepted, and that they
    # don't strictly have to be tuples
    assert eq(
        f32sc(npy_row, broadcastable=(True, False)).type,
        CudaNdarrayType((True, False)))
    assert eq(
        f32sc(npy_row, broadcastable=[True, False]).type,
        CudaNdarrayType((True, False)))
    assert eq(
        f32sc(npy_row, broadcastable=numpy.array([True, False])).type,
        CudaNdarrayType([True, False]))

    # test that we can make non-matrix shared vars
    assert eq(
        f32sc(numpy.zeros((2, 3, 4, 5), dtype='float32')).type,
        CudaNdarrayType((False, ) * 4))
コード例 #4
0
 def tensor_type(cls, dtype, ndim):
   # noinspection PyUnresolvedReferences,PyPackageRequirements
   from theano.sandbox.cuda import CudaNdarrayType
   if dtype != "float32":
     print("%s: WARNING: cannot handle type %r, will use float32 instead" % ("GpuNativeOp", dtype))
     dtype = "float32"
   return CudaNdarrayType(dtype=dtype, broadcastable=(False,) * ndim)
コード例 #5
0
 def make_node(self, pvals):
     assert pvals.dtype == 'float32'
     if not isinstance(pvals.type, CudaNdarrayType):
         raise TypeError('pvals must be cudandarray', pvals)
     if self.odtype == 'auto':
         odtype = pvals.dtype
     else:
         odtype = self.odtype
     if odtype != pvals.dtype:
         raise NotImplementedError('GpuKArgmax works only if'
             'self.odtype == pvals.dtype', odtype, pvals.dtype)
     
     br = (pvals.broadcastable[0], pvals.broadcastable[1])
     vals = CudaNdarrayType(broadcastable=br)()
     indx = CudaNdarrayType(broadcastable=br)()
     return Apply(self, [pvals], [vals, indx])
コード例 #6
0
    def make_node(self, images, acts, denoms, dout):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(images.type, CudaNdarrayType):
            inputs = images, acts, denoms, dout
            names = "images", "acts", "denoms", "dout"
            for name, var in zip(names, inputs):
                if not isinstance(var.type, CudaNdarrayType):
                    raise TypeError("CrossMapNormUndo: expected %s.type "
                                    "to be CudaNdarrayType, "
                                    "got %s" (name, str(images.type)))
        assert images.ndim == 4
        assert acts.ndim == 4
        assert denoms.ndim == 4
        assert dout.ndim == 4
        # Not strictly necessary I don't think
        assert images.type.broadcastable == acts.type.broadcastable
        assert images.type.broadcastable == denoms.type.broadcastable
        assert images.type.broadcastable == dout.type.broadcastable

        targets_broadcastable = tuple(images.type.broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()
        out_acts = targets_type()
        return Apply(self, [images, acts, denoms, dout], [targets, out_acts])
コード例 #7
0
 def make_node(self, x):
     from theano.sandbox.cuda import CudaNdarrayType
     if not isinstance(x.type, GpuArrayType):
         raise TypeError(x)
     if x.type.dtype != 'float32':
         raise TypeError(x)
     return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
コード例 #8
0
ファイル: img_acts.py プロジェクト: sdmassey27/pylearn2
    def make_node(self, hid_acts, filters):

        if not isinstance(hid_acts.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, "
                    "got " + str(hid_acts.type))

        if not isinstance(filters.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, "
                    "got " + str(filters.type))


        assert hid_acts.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = hid_acts.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [hid_acts, filters], [targets])
コード例 #9
0
def test_optimization():
    op = CrossMapNorm(16, 15. / 16., 1, True)
    x_ = theano.tensor.TensorVariable(CudaNdarrayType([False] * 4))
    f = theano.function([x_], theano.grad(op(x_)[0].sum(), x_))
    nodes = [
        x for x in f.maker.fgraph.apply_nodes if type(x.op) == CrossMapNormUndo
    ]
    assert len(nodes) == 1
    assert nodes[0].op.inplace
コード例 #10
0
ファイル: mask_loss.py プロジェクト: GALI472/deepdecoder
 def make_node(self, mask_idx, image):
     mask_idx = contiguouse(mask_idx)
     image = contiguouse(image)
     assert mask_idx.dtype == "float32"
     assert image.dtype == "float32"
     output_type = CudaNdarrayType(broadcastable=(False, ) * 5)
     return theano.Apply(
         self, [mask_idx, image],
         [output_type(), output_type(),
          output_type()])
コード例 #11
0
ファイル: extra_ops.py プロジェクト: Ambier/Theano
def values_eq_approx_high_tol(a, b):
    """This fct is needed to don't have DebugMode raise useless
    error due to rounding error.

    This happen with big input size due to change in the order of
    operation.
    """
    rtol = None
    if a.size > 100000:
        # For float32 the default rtol is 1e-5
        rtol = 5e-5
    return CudaNdarrayType.values_eq_approx(a, b, rtol=rtol)
コード例 #12
0
def values_eq_approx_high_tol(a, b):
    """This fct is needed to don't have DebugMode raise useless
    error due to rounding error.

    This happen with big input size due to change in the order of
    operation.
    """
    rtol = None
    if a.size > 100000:
        # For float32 the default rtol is 1e-5
        rtol = 5e-5
    return CudaNdarrayType.values_eq_approx(a, b, rtol=rtol)
コード例 #13
0
    def make_node(self, W, b, d, H, RShape=None):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
        H_ = as_cuda_ndarray_variable(H)
        if RShape:
            RShape_ = T.as_tensor_variable(RShape)
        else:
            RShape_ = T.as_tensor_variable([-1, -1, -1])

        return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_],
                            outputs=[CudaNdarrayType(dtype=H_.dtype,
                                                     broadcastable=(False,)*5)()])
コード例 #14
0
    def make_node(self, ten4, neib_shape, neib_step):
        assert ten4.dtype == 'float32'
        if not isinstance(ten4.type, CudaNdarrayType):
            raise TypeError('ten4 must be cudandarray', ten4)

        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1

        return Apply(self, [ten4, neib_shape, neib_step], [
            CudaNdarrayType(broadcastable=(False, False),
                            dtype=ten4.type.dtype)()
        ])
コード例 #15
0
    def make_node(self, images):
        if not isinstance(images.type, CudaNdarrayType):
            raise TypeError("CrossMapNorm: expected images.type to be CudaNdarrayType, "
                    "got " + str(images.type))

        assert images.ndim == 4

        targets_broadcastable = images.type.broadcastable
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        denoms = targets_type()
        targets = targets_type()

        return Apply(self, [images], [targets, denoms])
コード例 #16
0
ファイル: GpuConvGrad3D.py プロジェクト: wycg1984/Theano
    def make_node(self, V, d, WShape, dCdH):
        """
        :param V: visible
        :param d: strides
        :param WShape: shapes of the weights -> shape of this op output
        :param dCdH: other input with what V will be convolved.
        """
        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)

        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
コード例 #17
0
ファイル: neighbours.py プロジェクト: xiaozhuka/Theano
    def make_node(self, ten4, neib_shape, neib_step):
        ten4 = as_cuda_ndarray_variable(ten4)
        neib_shape = tensor.as_tensor_variable(neib_shape)
        neib_step = tensor.as_tensor_variable(neib_step)

        assert ten4.ndim == 4
        assert ten4.dtype == 'float32'
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1
        assert "int" in neib_shape.dtype
        assert "int" in neib_step.dtype

        return Apply(self, [ten4, neib_shape, neib_step],
                     [CudaNdarrayType(broadcastable=(False, False),
                                      dtype=ten4.type.dtype)()])
コード例 #18
0
 def make_node(self, images, filters):
     ibcast = images.broadcastable
     fbcast = filters.broadcastable
     igroups, icolors_per_group, irows, icols, icount = ibcast
     fmodulesR, fmodulesC, fcolors, frows, fcols = fbcast[:-2]
     fgroups, filters_per_group = fbcast[-2:]
     hbcast = (fgroups, filters_per_group, fmodulesR, fmodulesC, icount)
     if not isinstance(images.type, CudaNdarrayType):
         raise TypeError('gpu_filter_acts requires CudaNdarray images',
                         images)
     if not isinstance(filters.type, CudaNdarrayType):
         raise TypeError('gpu_filter_acts requires CudaNdarray filters',
                         filters)
     htype = CudaNdarrayType(broadcastable=hbcast)
     return theano.gof.Apply(self, [images, filters], [htype()])
コード例 #19
0
 def make_node(self, V, W, b, d):
     """
         :param V: Visible unit, input
         :param W: Weights, filter
         :param b: bias
         :param d: strides when moving the filter over the input
     """
     V_ = as_cuda_ndarray_variable(V)
     W_ = as_cuda_ndarray_variable(W)
     b_ = as_cuda_ndarray_variable(b)
     d_ = T.as_tensor_variable(d)
     broad = (V_.broadcastable[0], W_.broadcastable[0], False, False, False)
     return theano.Apply(
         self,
         inputs=[V_, W_, b_, d_],
         outputs=[CudaNdarrayType(dtype=V_.dtype, broadcastable=broad)()])
コード例 #20
0
ファイル: pycuda_example.py プロジェクト: bottler/Theano
    def make_node(self, *inputs):
        assert self.nout == 1
        assert len(inputs) == 2  # TODO remove
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")

        otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        return out_node
コード例 #21
0
ファイル: img_acts.py プロジェクト: yo-ga/TextDetector
    def make_node(self, hid_acts, filters, output_shape=None):
        """
        .. todo::

            WRITEME

        Parameters
        ----------
        hid_acts : WRITEME
        filters : WRITEME
        output_shape : 2-element TensorVariable, optional
            The spatial shape of the image
        """

        if not isinstance(hid_acts.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, "
                    "got " + str(hid_acts.type))

        if not isinstance(filters.type, CudaNdarrayType):
            raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, "
                    "got " + str(filters.type))


        if output_shape is None:
            if self.stride != 1:
                raise ValueError("You must specify an output_shape for ImageActs if the stride is not 1.")
            hid_shape = hid_acts.shape[1:3]
            kernel_shape = filters.shape[1:3]
            output_shape = hid_shape + kernel_shape - 2 * self.pad - 1

        assert hid_acts.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = hid_acts.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [hid_acts, filters, output_shape], [targets])
コード例 #22
0
 def make_node(self, pvals, unis):
     assert pvals.dtype == 'float32'
     assert unis.dtype == 'float32'
     if not isinstance(pvals.type, CudaNdarrayType):
         raise TypeError('pvals must be cudandarray', pvals)
     if not isinstance(unis.type, CudaNdarrayType):
         raise TypeError('unis must be cudandarray', unis)       
     if self.odtype == 'auto':
         odtype = pvals.dtype
     else:
         odtype = self.odtype
     if odtype != pvals.dtype:
         raise NotImplementedError('GpuMultinomialFromUniform2 works only if'
             'self.odtype == pvals.dtype', odtype, pvals.dtype)
     br = (unis.broadcastable[0], unis.broadcastable[1])
     out = CudaNdarrayType(broadcastable=br)()
     return Apply(self, [pvals, unis], [out])
コード例 #23
0
ファイル: mask_loss.py プロジェクト: GALI472/deepdecoder
    def make_node(self, mask_idx, image, og_sum, og_pow):
        mask_idx = contiguouse(mask_idx)
        image = contiguouse(image)
        inputs = [mask_idx, image]
        if str(og_sum) == "<DisconnectedType>" and \
                str(og_pow) == "<DisconnectedType>":
            raise ValueError("At least sum or pow gradient must be provided")

        if str(og_sum) != "<DisconnectedType>":
            og_sum = contiguouse(og_sum)
            inputs.append(og_sum)
        if str(og_pow) != "<DisconnectedType>":
            og_pow = contiguouse(og_pow)
            inputs.append(og_pow)

        output_type = CudaNdarrayType(broadcastable=(False, ) * 4)
        return theano.Apply(self, inputs, [output_type()])
コード例 #24
0
    def make_node(self, images):
        images = as_cuda_ndarray_variable(images)

        assert images.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [images], [targets])
コード例 #25
0
    def new_auto_update(cls, generator, ndim, dtype, size, seed):
        """
        Return a symbolic sample from generator.

        cls dictates the random variable (e.g. uniform, normal)

        """
        v_size = theano.tensor.as_tensor_variable(size)
        if ndim is None:
            ndim = get_vector_length(v_size)
        self = cls(output_type=CudaNdarrayType((False, ) * ndim),
                   seed=seed,
                   destructive=False)

        o_gen, sample = self(generator, cast(v_size, 'int32'))

        sample.generator = generator  # for user
        sample.update = (generator, o_gen)  # for CURAND_RandomStreams
        generator.default_update = o_gen  # for pfunc uses this attribute
        return sample
コード例 #26
0
    def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")
        assert len(inputs) == 2  # TODO remove

        otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim)
        assert self.nout == 1

        fct_name = "pycuda_elemwise_%s" % str(self.scalar_op)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        in_name = ["i" + str(id) for id in range(len(inputs))]
        out_name = ["o" + str(id) for id in range(self.nout)]
        c_code = self.scalar_op.c_code(out_node, "some_name",
                                       tuple([n + "[i]" for n in in_name]),
                                       tuple(n + "[i]" for n in out_name), {})
        c_code_param = ", ".join([var.type.dtype_specs()[1] + " *" + name
                                  for var, name in (zip(inputs, in_name) +
                                                    zip(out_node.outputs,
                                                        out_name))] +
                                 ["int size"])
        mod = SourceModule("""
#include<Python.h>
#include <numpy/arrayobject.h>
  __global__ void %s(%s)
  {
    int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y);
    i += threadIdx.x + threadIdx.y*blockDim.x;
    if(i<size){
        %s
    }
  }
  """ % (fct_name, c_code_param, c_code))
        self.pycuda_fct = mod.get_function(fct_name)
        return out_node
コード例 #27
0
    def make_node(self, images):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)

        assert images.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = CudaNdarrayType(broadcastable=targets_broadcastable)
        targets = targets_type()
        seed = self.seed_state
        seed = as_cuda_ndarray_variable(seed)
        return Apply(self, [images, seed], [targets])
コード例 #28
0
    def make_node(self, V, d, WShape, dCdH):
        """

        Parameters
        ----------
        V
            Visible.
        d
            Strides.
        WShape
            Shapes of the weights -> shape of this op output.
        dCdH
            Other input with what V will be convolved.

        """
        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)
        broad = (False,) * 5
        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
                            outputs=[CudaNdarrayType(dtype=V_.dtype,
                                                     broadcastable=broad)()])
コード例 #29
0
 def output_type(self, inp):
     return CudaNdarrayType(broadcastable=[False] * inp.type.ndim)
コード例 #30
0
 def output_type(self, inp):
     # add one extra dim for real/imag
     return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1))
コード例 #31
0
 def output_type(self, inp):
     # remove extra real/imag dim
     return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim - 1))