def make_node(self, data, rois):
    data = as_cuda_ndarray_variable(data)
    rois = as_cuda_ndarray_variable(rois)
    assert data.ndim == 4
    assert rois.ndim == 2

    return Apply(self, [data, rois], [data.type(), data.type()])
Example #2
0
    def make_node(self, data, rois):
        data = as_cuda_ndarray_variable(data)
        rois = as_cuda_ndarray_variable(rois)
        assert data.ndim == 4
        assert rois.ndim == 2

        return Apply(self, [data, rois], [data.type(), data.type()])
Example #3
0
def local_gpu_advanced_incsubtensor1_scal_floats(node):
    supported_dims = {
        # x.ndim, y.ndim
        (1, 0): GpuAdvancedIncSubtensor1Floats_scal_dev20,
        (2, 2): GpuAdvancedIncSubtensor1Floats_dev20,
    }

    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
        # Should not execute for GpuAdvancedIncSubtensor1
        if host_input.owner and \
           host_input.owner.op.__class__ is AdvancedIncSubtensor1Floats:
            x, y = host_input.owner.inputs[0:2]
            dims = (x.ndim, y.ndim)
            if dims not in supported_dims.keys():
                return False

            coords = host_input.owner.inputs[2:]
            set_instead_of_inc = host_input.owner.op.set_instead_of_inc
            inplace = host_input.owner.op.inplace

            gpu_op = supported_dims[dims](
                inplace=inplace, set_instead_of_inc=set_instead_of_inc)
            return [
                gpu_op(as_cuda_ndarray_variable(x),
                       as_cuda_ndarray_variable(y), *coords)
            ]

    # Should not execute for GpuAdvancedIncSubtensor1
    if (node.op.__class__ is AdvancedIncSubtensor1Floats
            and node.inputs[0].dtype == "float32"
            and node.inputs[1].dtype == "float32"
            and node.inputs[2].dtype == "float32"):
        x, y = node.inputs[0:2]
        dims = (x.ndim, y.ndim)
        if dims not in supported_dims:
            return False

        coords = node.inputs[2:]
        go_gpu = False
        if x.owner and isinstance(x.owner.op, HostFromGpu):
            go_gpu = True
            gpu_x, = x.owner.inputs
        else:
            gpu_x = as_cuda_ndarray_variable(x)
        if y.owner and isinstance(y.owner.op, HostFromGpu):
            go_gpu = True
            gpu_y, = y.owner.inputs
        else:
            gpu_y = as_cuda_ndarray_variable(y)
        if go_gpu:
            set_instead_of_inc = node.op.set_instead_of_inc
            inplace = node.op.inplace

            gpu_op = supported_dims[dims](
                inplace=inplace, set_instead_of_inc=set_instead_of_inc)
            return [host_from_gpu(gpu_op(gpu_x, gpu_y, *coords))]
    return False
Example #4
0
    def make_node(self, img, kern):
        img = as_cuda_ndarray_variable(img)
        kern = as_cuda_ndarray_variable(kern)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False]
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
  def make_node(self, data, rois, argmaxes, out_grad):
    data = as_cuda_ndarray_variable(data)
    rois = as_cuda_ndarray_variable(rois)
    argmaxes = as_cuda_ndarray_variable(argmaxes)
    out_grad = as_cuda_ndarray_variable(out_grad)
    assert data.ndim == 4
    assert rois.ndim == 2
    assert argmaxes.ndim == 4
    assert out_grad.ndim == 4

    return Apply(self, [data, rois, argmaxes, out_grad], [data.type()])
Example #6
0
    def make_node(self, data, rois, argmaxes, out_grad):
        data = as_cuda_ndarray_variable(data)
        rois = as_cuda_ndarray_variable(rois)
        argmaxes = as_cuda_ndarray_variable(argmaxes)
        out_grad = as_cuda_ndarray_variable(out_grad)
        assert data.ndim == 4
        assert rois.ndim == 2
        assert argmaxes.ndim == 4
        assert out_grad.ndim == 4

        return Apply(self, [data, rois, argmaxes, out_grad], [data.type()])
    def make_node(self, bottom0, bottom1):
        bottom0 = as_cuda_ndarray_variable(bottom0)
        bottom1 = as_cuda_ndarray_variable(bottom1)

        assert bottom0.ndim == 4
        assert bottom1.ndim == 4

        return Apply(
            self, [bottom0, bottom1],
            [bottom0.type(), bottom0.type(),
             bottom0.type()])
Example #8
0
    def make_node(self, img, kern):
        img = as_cuda_ndarray_variable(img)
        kern = as_cuda_ndarray_variable(kern)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False]
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
    def make_node(self, bottom0, bottom1, rbot0, rbot1, out_grad):
        bottom0 = as_cuda_ndarray_variable(bottom0)
        bottom1 = as_cuda_ndarray_variable(bottom1)
        rbot0 = as_cuda_ndarray_variable(rbot0)
        rbot1 = as_cuda_ndarray_variable(rbot1)
        out_grad = as_cuda_ndarray_variable(out_grad)

        assert bottom0.ndim == 4
        assert bottom1.ndim == 4
        assert rbot0.ndim == 4
        assert rbot1.ndim == 4
        assert out_grad.ndim == 4

        return Apply(self, [bottom0, bottom1, rbot0, rbot1, out_grad],
                     [bottom0.type(), bottom0.type()])
Example #10
0
    def make_node(self, input):
        input = gpu_contiguous(as_cuda_ndarray_variable(input))

        self.destructive = True

        assert input.dtype == "float32"
        assert input.ndim == 3  # (batch, a, b)

        return theano.Apply(self, [input],
                            [self.output_type(input)()])
Example #11
0
    def make_node(self, cond, ift, iff):
        if any(ift.broadcastable) or any(iff.broadcastable):
            raise ValueError(
                "GPURowSwitch cannot operate on broadcastable "
                "output arguments (ift %s, iff %s)." % ift.broadcastable,
                iff.broadcastable)
        out_type = ift.dtype

        cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))
        ift = as_cuda_ndarray_variable(ift)
        iff = as_cuda_ndarray_variable(iff)

        assert ift.type.dtype == iff.type.dtype
        assert cond.ndim == 1, cond.ndim
        assert ift.ndim == iff.ndim

        return theano.gof.Apply(self, [cond, ift, iff], [
            CudaNdarrayType(broadcastable=ift.broadcastable, dtype=out_type)()
        ])
Example #12
0
    def make_node(self, cond, ift, iff):
        if any(ift.broadcastable) or any(iff.broadcastable):
            raise ValueError(
                "GpuMaskedCAReduce cannot operate on "
                "broadcastable output arguments (ift %s, iff %s)." %
                ift.broadcastable, iff.broadcastable)
        out_type = ift.dtype

        cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))
        ift = as_cuda_ndarray_variable(ift)
        iff = as_cuda_ndarray_variable(iff)
        # TODO check contiguous?

        assert ift.type.dtype == iff.type.dtype
        assert cond.ndim == 1, cond.ndim
        assert ift.ndim == iff.ndim

        out_bcast = ift.broadcastable[1:]
        return theano.gof.Apply(
            self, [cond, ift, iff],
            [CudaNdarrayType(broadcastable=out_bcast, dtype=out_type)()])
Example #13
0
def local_gpua_row_switch(node):
    """
    Detects eligible Switch instances and replaces them with a GPU
    row switch.
    """

    if (node.op.__class__ == T.Elemwise
            and node.op.scalar_op.__class__ != theano.scalar.Switch):
        return False

    cond, ift, iff = node.inputs
    out, = node.outputs

    # Only applies to Switch instances where a vector mask broadcasts over
    # matrices.
    bcast = cond.broadcastable
    if not bcast or not (not bcast[0] and all(bcast[1:])
                         and ift.ndim in [2, 3]):
        return False

    if not (ift.dtype == iff.dtype == "float32"):
        return False

    if cond.owner and isinstance(cond.owner.op, HostFromGpu):
        gpu_cond, = cond.owner.inputs
    else:
        gpu_cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))

    if ift.owner and isinstance(ift.owner.op, HostFromGpu):
        gpu_ift, = ift.owner.inputs
    else:
        gpu_ift = as_cuda_ndarray_variable(ift)

    if iff.owner and isinstance(iff.owner.op, HostFromGpu):
        gpu_iff, = iff.owner.inputs
    else:
        gpu_iff = as_cuda_ndarray_variable(iff)

    gpu_op = GpuRowSwitch()
    return [HostFromGpu()(gpu_op(cond, gpu_ift, gpu_iff))]
Example #14
0
    def make_node(self, x, y, ilist):
        x_ = as_cuda_ndarray_variable(x)
        y_ = as_cuda_ndarray_variable(y)
        ilist_ = gpu_contiguous(T.cast(ilist, config.floatX))

        assert x_.type.dtype == y_.type.dtype
        assert x_.type.ndim >= y_.type.ndim

        #if ilist_.type.dtype[:3] not in ('int', 'uin'):
        #    raise TypeError('index must be integers')
        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
        if y_.type.ndim > x_.type.ndim:
            if self.set_instead_of_inc:
                opname = 'set'
            else:
                opname = 'increment'
            raise TypeError('cannot %s x subtensor with ndim=%s'
                            ' by y with ndim=%s' %
                            (opname, x_.type.ndim, y_.type.ndim))

        return theano.gof.Apply(self, [x_, y_, ilist_], [x_.type()])
Example #15
0
    def make_node(self, x, ilist):
        x_ = as_cuda_ndarray_variable(x)
        ilist_ = gpu_contiguous(T.cast(
            ilist, dtype=config.floatX))  # T.as_tensor_variable(ilist)
        #if ilist_.type.dtype[:3] not in ('int', 'uin'):
        #    raise TypeError('index must be integers')
        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')

        # # c code suppose it is int64
        # if x.ndim in [1, 2, 3] and ilist_.dtype in [
        #     'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32']:
        #     ilist_ = tensor.cast(ilist_, 'int64')

        bcast = (ilist_.broadcastable[0], ) + x_.broadcastable[1:]
        return theano.gof.Apply(
            self, [x_, ilist_],
            [CudaNdarrayType(dtype=x.dtype, broadcastable=bcast)()])
Example #16
0
def local_gpu_join_unsafe(node):
    """
    Inspired by the opt for convop.
    Very loose notation follows.
    Subgraphs concerned first look like
        [array of HostTensor] -> HostToGpu -> GpuToHost
        -> Join -> HostToGpu -> GpuToHost
    First we apply this Opt:
    join(host_from_gpu) -> host_from_gpu(gpu_join)
    then, as an intermediate result, there should be
    host_from_gpu(gpu_join) -> HostToGpu -> GpuToHost
    this unnecessary GpuToHost -> HostToGpu should be removed
    by other opts, leaving us with
    host_from_gpu(gpu_join)
    For intermediate places in the graph not covered by the first opt, the
    following could be useful:
    gpu_from_host(join) -> gpu_join(gpu_from_host)
    not implemented yet.
    """
    if isinstance(node.op, JoinUnsafe):
        # optimizing this case:
        # join(host_from_gpu) -> host_from_gpu(gpu_join)

        axis_and_tensors = node.inputs

        matches = [
            t.dtype == 'float32'
            and ((t.owner is not None and isinstance(t.owner.op, HostFromGpu))
                 or isinstance(t, theano.gof.Constant))
            for t in axis_and_tensors[1:]
        ]

        if all(matches):
            new_tensors = [
                as_cuda_ndarray_variable(t) for t in axis_and_tensors[1:]
            ]
            new_a_and_t = [axis_and_tensors[0]] + new_tensors

            replacement_node = host_from_gpu(GpuJoinUnsafe()(*new_a_and_t))

            return [replacement_node]
Example #17
0
def local_gpu_advanced_subtensor1_floats(node):
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
        if host_input.owner and \
           host_input.owner.op.__class__ is AdvancedSubtensor1Floats:
            x = host_input.owner.inputs[0]
            coords = host_input.owner.inputs[1:]
            return [
                GpuAdvancedSubtensor1Floats(host_input.owner.op._tag)(
                    as_cuda_ndarray_variable(x), *coords)
            ]
    if node.op.__class__ is AdvancedSubtensor1Floats:
        x = node.inputs[0]
        coords = node.inputs[1:]
        # print x.owner.op, x.type, node.op._tag # DEV
        if (x.owner and isinstance(x.owner.op, HostFromGpu)
                and x.dtype == "float32"):
            gpu_x, = x.owner.inputs
            return [
                host_from_gpu(
                    GpuAdvancedSubtensor1Floats(node.op._tag)(gpu_x, *coords))
            ]
    return False