Beispiel #1
0
def tensor_to_cuda(x):
    if isinstance(x.type, tensor.TensorType) and x.type.dtype == "float32":
        y = CudaNdarrayType(broadcastable=x.type.broadcastable)()
        if x.name:
            y.name = x.name + "[cuda]"
        return y
    else:
        return x
    def make_node(
            self,
            # model parameters and bookkeeping variables
            V,
            UT,
            Uinv,
            QT,
            omega,
            w_bar,
            # minibatch value inputs
            HT,
            KindexesT):
        """outputs will be: AT, q, s, work_d, work_m"""

        # The following are supposed to reside on the GPU
        V = as_cuda_ndarray_variable(V)
        UT = as_cuda_ndarray_variable(UT)
        Uinv = as_cuda_ndarray_variable(Uinv)
        QT = as_cuda_ndarray_variable(QT)
        omega = as_cuda_ndarray_variable(omega)
        w_bar = as_cuda_ndarray_variable(w_bar)

        HT = as_cuda_ndarray_variable(HT)

        # This is on GPU
        KindexesT = as_tensor_variable(KindexesT)

        # List of op parameters
        params = [V, UT, Uinv, QT, omega, w_bar, HT, KindexesT]

        # make sure parameters are either all of dtype float32 or all of dtype float64 (except for Kindexes which are integers)
        elem_type = V.dtype
        if elem_type != "float32" and elem_type != "float64":
            raise TypeError(
                "GpuFactoredSphericalOp parameter V must have dtype of float32 or float64"
            )

        check_tensor_variables_ndim_and_dtype(2, elem_type,
                                              ["V", "UT", "Uinv", "QT", "HT"],
                                              locals())
        check_tensor_variables_ndim_and_dtype(1, elem_type, ["omega", "w_bar"],
                                              locals())
        check_tensor_variables_ndim_and_dtype(2, "int32", ["KindexesT"],
                                              locals())

        # Now properly set up outputs to compute: AT, q, s
        outputs = [
            CudaNdarrayType(broadcastable=(False, False))(),  # AT
            CudaNdarrayType(broadcastable=(False, ))(),  # q
            CudaNdarrayType(broadcastable=(False, ))(),  # s
            CudaNdarrayType(broadcastable=(False, False))(),  # work_d
            CudaNdarrayType(broadcastable=(False, False))()  # work_m
        ]

        return Apply(self, params, outputs)
Beispiel #3
0
def cuda_shared_constructor(value, name=None, strict=False,
        allow_downcast=None, borrow=False, broadcastable=None):
    """SharedVariable Constructor for CudaNdarrayType"""

    # THIS CONSTRUCTOR TRIES TO CAST VALUE TO A FLOAT32, WHICH THEN GOES ONTO THE CARD
    # SO INT shared vars, float64 shared vars, etc. all end up on the card.
    # THIS IS NOT THE DEFAULT BEHAVIOUR THAT WE WANT.
    # SEE float32_shared_constructor

    #TODO: what should strict mean in this context, since we always have to make a copy?
    if strict:
        _value = value
    else:
        _value = theano._asarray(value, dtype='float32')

    if not isinstance(_value, numpy.ndarray):
        raise TypeError('ndarray required')
    if _value.dtype.num != CudaNdarrayType.typenum:
        raise TypeError('float32 ndarray required')

    if broadcastable is None:
        broadcastable = (False,) * len(value.shape)
    type = CudaNdarrayType(broadcastable=broadcastable)
    print "trying to return?"
    try:
        rval = CudaNdarraySharedVariable(type=type, value=_value, name=name, strict=strict)
    except Exception, e:
        print "ERROR", e
        raise
Beispiel #4
0
def may_share_memory(a, b, raise_other_type=True):
    a_ndarray = isinstance(a, np.ndarray)
    b_ndarray = isinstance(b, np.ndarray)
    if a_ndarray and b_ndarray:
        return TensorType.may_share_memory(a, b)
    a_cuda = _is_cuda(a)
    b_cuda = _is_cuda(b)
    if a_cuda and b_cuda:
        return CudaNdarrayType.may_share_memory(a, b)
    a_gpua = _is_gpua(a)
    b_gpua = _is_gpua(b)
    if a_gpua and b_gpua:
        return gpuarray.pygpu.gpuarray.may_share_memory(a, b)

    a_sparse = _is_sparse(a)
    b_sparse = _is_sparse(b)
    if (not(a_ndarray or a_sparse or a_cuda or a_gpua) or
            not(b_ndarray or b_sparse or b_cuda or b_gpua)):
        if raise_other_type:
            raise TypeError("may_share_memory support only ndarray"
                            " and scipy.sparse, CudaNdarray or GpuArray type")
        return False

    if a_cuda or b_cuda or a_gpua or b_gpua:
        return False
    return SparseType.may_share_memory(a, b)
Beispiel #5
0
 def __init__(self, computeGradient = True):
   super(GpuCtc,self).__init__()
   self.computeGradient = computeGradient
   self.costs = T.fvector(name="ctc_cost")
   if self.computeGradient:
     self.gradients = CudaNdarrayVariable(name="ctc_grad", 
                                          type=CudaNdarrayType(broadcastable=[False, False, False]))
Beispiel #6
0
    def make_node(self, output_spike, H_out, weights):
        if output_spike.type.ndim != 4:
            raise TypeError('output_spike must be 4D tensor')
        if H_out.type.ndim != 4:
            raise TypeError('H_out must be 4D tensor')
        if weights.type.ndim != 4:
            raise TypeError('weights must be 4D tensor')
        # if LR.type.ndim != 1:
        #     raise TypeError('LR must be 1D tensor')
        # if weight_update.type.ndim != 4:
        #     raise TypeError('weight_update must be 4D tensor')

        output_spike = as_cuda_ndarray_variable(output_spike)
        H_out = as_cuda_ndarray_variable(H_out)
        weights = as_cuda_ndarray_variable(weights)
        # LR= as_cuda_ndarray_variable(LR)
        #weight_update = as_cuda_ndarray_variable(weight_update)

        print 'MAKENODE: ', output_spike.shape, H_out.shape, weights.shape
        # broadcastable = [output_spike.type.broadcastable[0], H_out.type.broadcastable[0],weights.type.broadcastable[0],
        #                  weight_update,False, False, False, False]
        # otype = CudaNdarrayType(broadcastable=[False] * 4)
        broadcastable = [False, False, False, False, False]
        return Apply(self, [output_spike, H_out, weights],
                     [CudaNdarrayType(broadcastable)()])
Beispiel #7
0
def may_share_memory(a, b, raise_other_type=True):
    a_ndarray = isinstance(a, np.ndarray)
    b_ndarray = isinstance(b, np.ndarray)
    if a_ndarray and b_ndarray:
        return TensorType.may_share_memory(a, b)
    a_cuda = _is_cuda(a)
    b_cuda = _is_cuda(b)
    if a_cuda and b_cuda:
        return CudaNdarrayType.may_share_memory(a, b)
    a_gpua = _is_gpua(a)
    b_gpua = _is_gpua(b)
    if a_gpua and b_gpua:
        return gpuarray.pygpu.gpuarray.may_share_memory(a, b)

    a_sparse = _is_sparse(a)
    b_sparse = _is_sparse(b)
    if (not (a_ndarray or a_sparse or a_cuda or a_gpua)
            or not (b_ndarray or b_sparse or b_cuda or b_gpua)):
        if raise_other_type:
            raise TypeError("may_share_memory support only ndarray"
                            " and scipy.sparse, CudaNdarray or GpuArray type")
        return False

    if a_cuda or b_cuda or a_gpua or b_gpua:
        return False
    return SparseType.may_share_memory(a, b)
Beispiel #8
0
    def make_node(self, initial_state, inp_state, inp_update, inp_reset,
                  state_to_state, state_to_update, state_to_reset):

        weights = [state_to_state, state_to_update, state_to_reset]

        batch_size = inp_state.shape[1]
        assert initial_state.dtype == "float32"
        assert initial_state.ndim == 1

        initial_state = as_cuda_ndarray_variable(
            tensor.repeat(initial_state[None, :], batch_size, 0))

        for i, w in enumerate(weights):
            weights[i] = as_cuda_ndarray_variable(w)

        inputs = [inp_state, inp_update, inp_reset]
        for i, b in enumerate(inputs):
            inputs[i] = as_cuda_ndarray_variable(b)

        for w in weights:
            assert w.dtype == "float32"
            assert w.ndim == 2

        for i in inputs:
            assert i.dtype == "float32"
            assert i.ndim == 3

        out_type = CudaNdarrayType((False, False))
        return theano.Apply(self, [initial_state] + inputs + weights,
                            [out_type()])
Beispiel #9
0
 def make_node(self, x, y):
     if x.type.ndim != 2:
         raise TypeError(x)
     if y.type.ndim != 2:
         raise TypeError(y)
     otype = CudaNdarrayType(
         (x.type.broadcastable[0], y.type.broadcastable[1]))
     return Apply(self, [x, y], [otype()])
Beispiel #10
0
def float32_shared_constructor(value,
                               name=None,
                               strict=False,
                               allow_downcast=None,
                               borrow=False,
                               broadcastable=None,
                               target='gpu'):
    """
    SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or
    CudaNdarray.

    """
    if target != 'gpu':
        raise TypeError('not for gpu')
    if theano.sandbox.cuda.use.device_number is None:
        theano.sandbox.cuda.use("gpu",
                                force=True,
                                default_to_move_computation_to_gpu=False,
                                move_shared_float32_to_gpu=False,
                                enable_cuda=False)

    # if value isn't a float32 ndarray, or a CudaNdarray then raise

    if not isinstance(value, (numpy.ndarray, theano.sandbox.cuda.CudaNdarray)):
        raise TypeError('ndarray or CudaNdarray required')
    if isinstance(
            value,
            numpy.ndarray) and value.dtype.num != CudaNdarrayType.typenum:
        raise TypeError('float32 ndarray required')

    if broadcastable is None:
        broadcastable = (False, ) * len(value.shape)
    type = CudaNdarrayType(broadcastable=broadcastable)
    get_value_return_ndarray = True
    if isinstance(value, theano.sandbox.cuda.CudaNdarray):
        get_value_return_ndarray = False
        if borrow:
            deviceval = value
        else:
            deviceval = value.copy()
    else:
        # type.broadcastable is guaranteed to be a tuple, which this next
        # function requires
        deviceval = type_support_filter(value, type.broadcastable, False, None)

    try:
        rval = CudaNdarraySharedVariable(type=type,
                                         value=deviceval,
                                         name=name,
                                         strict=strict)
    except Exception as e:
        print("ERROR", e)
        raise

    rval.get_value_return_ndarray = get_value_return_ndarray

    return rval
Beispiel #11
0
    def make_node(self, img, kern):
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False]
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
Beispiel #12
0
    def make_node(self, x, y):
        # we suppose type checking has been done, but make sure.
        assert (x.type.ndim == 1 and y.type.ndim == 1
                and x.type.dtype == 'float32' and y.type.dtype == 'float32')

        bz = [x.type.broadcastable[0], y.type.broadcastable[0]]

        outputs = [CudaNdarrayType(dtype='float32', broadcastable=bz)()]
        return Apply(self, [x, y], outputs)
Beispiel #13
0
    def make_node(self, inp1, inp2):
        inp1 = as_cuda_ndarray_variable(inp1)
        inp2 = as_cuda_ndarray_variable(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        return theano.Apply(
            self, [inp1, inp2],
            [CudaNdarrayType(broadcastable=[False] * inp1.type.ndim)()])
Beispiel #14
0
 def make_node(self, x, y, a):
     if x.type.ndim != 2:
         raise TypeError(x)
     if y.type.ndim != 2:
         raise TypeError(y)
     if not tensor.blas._as_scalar(a):
         raise TypeError(a)
     otype = CudaNdarrayType(
         (x.type.broadcastable[0], y.type.broadcastable[1]))
     return Apply(self, [x, y, a], [otype()])
Beispiel #15
0
def test_int_pow():
    a = CudaNdarrayType([False])()

    f = theano.function([a], (a * 4).sum(), mode=mode_with_gpu)

    op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
    assert op_names == ['GpuCAReduce', 'GpuElemwise', 'HostFromGpu']

    f = theano.function([a], tensor.pow(a, 4).sum(), mode=mode_with_gpu)
    op_names = [n.op.__class__.__name__ for n in f.maker.fgraph.toposort()]
    assert op_names == ['GpuElemwise', 'GpuCAReduce', 'HostFromGpu']
Beispiel #16
0
    def make_node(self, img, kern, desc):
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        broadcastable = (img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False)

        return Apply(self, [img, kern, desc],
                     [CudaNdarrayType(broadcastable)()])
Beispiel #17
0
    def test_dump_load(self):
        if not cuda_ndarray.cuda_enabled:
            raise SkipTest('Optional package cuda disabled')

        x = CudaNdarraySharedVariable('x', CudaNdarrayType((1, 1), name='x'),
                                      [[1]], False)

        with open('test', 'wb') as f:
            dump(x, f)

        with open('test', 'rb') as f:
            x = load(f)

        assert x.name == 'x'
        assert_allclose(x.get_value(), [[1]])
Beispiel #18
0
    def make_node(self, kern, topgrad, desc):
        kern = as_cuda_ndarray_variable(kern)
        topgrad = as_cuda_ndarray_variable(topgrad)
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')
        if topgrad.type.ndim != 4:
            raise TypeError('topgrad must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        broadcastable = [topgrad.type.broadcastable[0],
                         kern.type.broadcastable[1],
                         False, False]
        return Apply(self, [kern, topgrad, desc],
                     [CudaNdarrayType(broadcastable)()])
Beispiel #19
0
    def make_node(self, acts, input_lengths, flat_labels, label_lengths):
        if not isinstance(acts.type, CudaNdarrayType):
            raise Exception("Activations should be CudaNdarrayType, not %s" %
                            (acts.type, ))
        acts_ = acts
        input_lengths_ = T.as_tensor_variable(input_lengths)
        flat_labels_ = T.as_tensor_variable(flat_labels)
        label_lengths_ = T.as_tensor_variable(label_lengths)

        if acts_.dtype != "float32":
            raise Exception("acts must be float32 instead of %s" % acts.dtype)
        if input_lengths.dtype != "int32":
            raise Exception("input_lengths must be int32 instead of %s" %
                            input_lengths.dtype)
        if flat_labels.dtype != "int32":
            raise Exception("flat_labels must be int32 instead of %s" %
                            flat_labels.dtype)
        if label_lengths.dtype != "int32":
            raise Exception("label_lengths must be int32 instead of %s" %
                            label_lengths.dtype)

        # Normally a singleton Op instance is created, and different Apply nodes are
        # created for different inputs.
        # Here, we create an Op instance specifically for this application,
        # and store the gradient variable in it so that it can be used by grad().
        op = GpuCtc()
        op.costs = T.fvector(name="ctc_cost")
        op.gradients = CudaNdarrayVariable(
            name="gpu_ctc_grad",
            type=CudaNdarrayType(broadcastable=[False, False, False]))

        # Don't compute gradient unless needed
        op.computeGradient = theano.shared(np.asarray([1], dtype=np.int32))

        applyNode = theano.Apply(op,
                                 inputs=[
                                     acts_, input_lengths_, flat_labels_,
                                     label_lengths_, op.computeGradient
                                 ],
                                 outputs=[op.costs, op.gradients])

        # Return only the cost. Gradient will be returned by grad()
        self.default_output = 0
        return applyNode
Beispiel #20
0
    def make_node(self, dCdy, x, a, b, l, s):
        for input, ndim in ((dCdy, 2 + len(self.patch_shape)),
                            (x, 2 + len(self.patch_shape)), (a, 2), (b, 2),
                            (l, 2), (s, 2)):
            if not input.type.ndim == ndim:
                raise TypeError()
        dCdy, x, a, b, l, s = tuple(map(gpu_contiguous, (dCdy, x, a, b, l, s)))
        inputs = list(map(as_cuda_ndarray_variable, (dCdy, x, a, b, l, s)))

        # we could return the much smaller dCdl, dCds but that
        # gives us very little room to parallelize (e.g. with batch
        # size 100 and 3 spatial dimensions we have only 600
        # independently computable output elements).
        output_type = CudaNdarrayType(
            broadcastable=list(inputs[0].type.broadcastable) + [False],
            dtype=inputs[0].type.dtype)
        dydl = output_type()
        dyds = output_type()
        return Apply(self, inputs, [dydl, dyds])
Beispiel #21
0
    def make_node(self, kern, topgrad, shape=None):
        kern = as_cuda_ndarray_variable(kern)
        topgrad = as_cuda_ndarray_variable(topgrad)
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')
        if topgrad.type.ndim != 4:
            raise TypeError('topgrad must be 4D tensor')
        if shape is None:
            if self.subsample != (1, 1):
                raise ValueError('shape must be given if subsample != (1, 1)')
            height_width = []
        else:
            height_width = [shape[0], shape[1]]
            assert shape[0].ndim == 0
            assert shape[1].ndim == 0

        broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
                         False, False]
        return Apply(self, [kern, topgrad] + height_width, [CudaNdarrayType(broadcastable)()])
Beispiel #22
0
    def make_node(self, cond, ift, iff):
        if any(ift.broadcastable) or any(iff.broadcastable):
            raise ValueError(
                "GPURowSwitch cannot operate on broadcastable "
                "output arguments (ift %s, iff %s)." % ift.broadcastable,
                iff.broadcastable)
        out_type = ift.dtype

        cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))
        ift = as_cuda_ndarray_variable(ift)
        iff = as_cuda_ndarray_variable(iff)

        assert ift.type.dtype == iff.type.dtype
        assert cond.ndim == 1, cond.ndim
        assert ift.ndim == iff.ndim

        return theano.gof.Apply(self, [cond, ift, iff], [
            CudaNdarrayType(broadcastable=ift.broadcastable, dtype=out_type)()
        ])
Beispiel #23
0
    def make_node(self, img, topgrad, shape=None):
        img = as_cuda_ndarray_variable(img)
        topgrad = as_cuda_ndarray_variable(topgrad)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if topgrad.type.ndim != 4:
            raise TypeError('topgrad must be 4D tensor')
        if shape is None:
            if self.subsample != (1, 1) or self.border_mode == "half":
                raise ValueError('shape must be given if subsample != (1, 1)'
                                 ' or border_mode == "half"')
            height_width = []
        else:
            height_width = [shape[0], shape[1]]
            assert shape[0].ndim == 0
            assert shape[1].ndim == 0

        broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
                         False, False]
        return Apply(self, [img, topgrad] + height_width, [CudaNdarrayType(broadcastable)()])
Beispiel #24
0
    def make_node(self, x, ilist):
        x_ = as_cuda_ndarray_variable(x)
        ilist_ = gpu_contiguous(T.cast(
            ilist, dtype=config.floatX))  # T.as_tensor_variable(ilist)
        #if ilist_.type.dtype[:3] not in ('int', 'uin'):
        #    raise TypeError('index must be integers')
        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')

        # # c code suppose it is int64
        # if x.ndim in [1, 2, 3] and ilist_.dtype in [
        #     'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32']:
        #     ilist_ = tensor.cast(ilist_, 'int64')

        bcast = (ilist_.broadcastable[0], ) + x_.broadcastable[1:]
        return theano.gof.Apply(
            self, [x_, ilist_],
            [CudaNdarrayType(dtype=x.dtype, broadcastable=bcast)()])
Beispiel #25
0
    def make_node(self, img, topgrad, desc, h, w):
        img = as_cuda_ndarray_variable(img)
        topgrad = as_cuda_ndarray_variable(topgrad)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if topgrad.type.ndim != 4:
            raise TypeError('topgrad must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        h = as_scalar(h)
        w = as_scalar(w)

        broadcastable = [topgrad.type.broadcastable[1],
                         img.type.broadcastable[1],
                         False, False]

        return Apply(self, [img, topgrad, desc, h, w],
                     [CudaNdarrayType(broadcastable)()])
Beispiel #26
0
    def make_node(self, cond, ift, iff):
        if any(ift.broadcastable) or any(iff.broadcastable):
            raise ValueError(
                "GpuMaskedCAReduce cannot operate on "
                "broadcastable output arguments (ift %s, iff %s)." %
                ift.broadcastable, iff.broadcastable)
        out_type = ift.dtype

        cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32"))
        ift = as_cuda_ndarray_variable(ift)
        iff = as_cuda_ndarray_variable(iff)
        # TODO check contiguous?

        assert ift.type.dtype == iff.type.dtype
        assert cond.ndim == 1, cond.ndim
        assert ift.ndim == iff.ndim

        out_bcast = ift.broadcastable[1:]
        return theano.gof.Apply(
            self, [cond, ift, iff],
            [CudaNdarrayType(broadcastable=out_bcast, dtype=out_type)()])
Beispiel #27
0
def may_share_memory(a, b, raise_other_type=True):
    a_ndarray = isinstance(a, numpy.ndarray)
    b_ndarray = isinstance(b, numpy.ndarray)
    a_sparse = _is_sparse(a)
    b_sparse = _is_sparse(b)
    a_cuda = _is_cuda(a)
    b_cuda = _is_cuda(b)

    if not(a_ndarray or a_sparse or a_cuda) or not(b_ndarray or b_sparse or b_cuda):
        if raise_other_type:
            raise TypeError("may_share_memory support only ndarray and scipy.sparse and CudaNdarray type")
        return False

    if a_ndarray and b_ndarray:
        return TensorType.may_share_memory(a,b)
    if a_cuda and b_cuda:
        from theano.sandbox.cuda.type import CudaNdarrayType
        return CudaNdarrayType.may_share_memory(a,b)
    if a_cuda or b_cuda:
        return False
    return SparseType.may_share_memory(a,b)
Beispiel #28
0
def float32_shared_constructor(value,
                               name=None,
                               strict=False,
                               allow_downcast=None,
                               borrow=False,
                               broadcastable=None):
    """SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or CudaNdarray"""

    # if value isn't a float32 ndarray, or a CudaNdarray then raise

    if not isinstance(value, (numpy.ndarray, theano.sandbox.cuda.CudaNdarray)):
        raise TypeError('ndarray or CudaNdarray required')
    if isinstance(
            value,
            numpy.ndarray) and value.dtype.num != CudaNdarrayType.typenum:
        raise TypeError('float32 ndarray required')

    if broadcastable is None:
        broadcastable = (False, ) * len(value.shape)
    type = CudaNdarrayType(broadcastable=broadcastable)
    get_value_return_ndarray = True
    if isinstance(value, theano.sandbox.cuda.CudaNdarray):
        get_value_return_ndarray = False
        if borrow:
            deviceval = value
        else:
            deviceval = value.copy()
    else:
        # type.broadcastable is guaranteed to be a tuple, which this next
        # function requires
        deviceval = type_support_filter(value, type.broadcastable, False, None)

    try:
        rval = CudaNdarraySharedVariable(type=type,
                                         value=deviceval,
                                         name=name,
                                         strict=strict)
    except Exception, e:
        print "ERROR", e
        raise
Beispiel #29
0
def may_share_memory(a, b, raise_other_type=True):
    a_ndarray = isinstance(a, numpy.ndarray)
    b_ndarray = isinstance(b, numpy.ndarray)
    a_sparse = _is_sparse(a)
    b_sparse = _is_sparse(b)
    a_cuda = _is_cuda(a)
    b_cuda = _is_cuda(b)

    if (not (a_ndarray or a_sparse or a_cuda)
            or not (b_ndarray or b_sparse or b_cuda)):
        if raise_other_type:
            raise TypeError("may_share_memory support only ndarray"
                            " and scipy.sparse and CudaNdarray type")
        return False

    if a_ndarray and b_ndarray:
        return TensorType.may_share_memory(a, b)
    if a_cuda and b_cuda:
        from theano.sandbox.cuda.type import CudaNdarrayType
        return CudaNdarrayType.may_share_memory(a, b)
    if a_cuda or b_cuda:
        return False
    return SparseType.may_share_memory(a, b)
Beispiel #30
0
 def output_type(self, inp):
     return CudaNdarrayType(broadcastable=[False] * inp.type.ndim)
    def make_node(
            self,
            # model parameters and bookkeeping variables
            V,
            UT,
            Uinv,
            QT,
            omega,
            w_bar,
            # minibatch value inputs
            HT,
            KindexesT,
            # workspace
            work_d,
            work_m,
            #  minibatch gradient inputs
            grad_AT,
            grad_q,
            grad_s,
            # learning rate
            eta):
        """output will be: grad_HT """

        # The following are supposed to reside on the GPU
        V = as_cuda_ndarray_variable(V)
        UT = as_cuda_ndarray_variable(UT)
        Uinv = as_cuda_ndarray_variable(Uinv)
        QT = as_cuda_ndarray_variable(QT)
        omega = as_cuda_ndarray_variable(omega)
        w_bar = as_cuda_ndarray_variable(w_bar)

        HT = as_cuda_ndarray_variable(HT)

        # This is on CPU
        KindexesT = as_tensor_variable(KindexesT)

        # The following are supposed to reside on the GPU
        work_d = as_cuda_ndarray_variable(work_d)
        work_m = as_cuda_ndarray_variable(work_m)

        grad_AT = as_cuda_ndarray_variable(grad_AT)
        grad_q = as_cuda_ndarray_variable(grad_q)
        grad_s = as_cuda_ndarray_variable(grad_s)

        # This is on CPU
        eta = as_tensor_variable(eta)

        # parametr list
        params = [
            V, UT, Uinv, QT, omega, w_bar, HT, KindexesT, work_d, work_m,
            grad_AT, grad_q, grad_s, eta
        ]

        # make sure parameters are either all of dtype float32 or all of dtype float64 (except for Kindexes which are integers)
        elem_type = V.dtype
        if elem_type != "float32" and elem_type != "float64":
            raise TypeError(
                "GpuFactoredSphericalOp parameter V must have dtype of float32 or float64"
            )

        check_tensor_variables_ndim_and_dtype(0, elem_type, ["eta"], locals())
        check_tensor_variables_ndim_and_dtype(
            2, elem_type,
            ["V", "UT", "Uinv", "QT", "HT", "grad_AT", "work_d", "work_m"],
            locals())
        check_tensor_variables_ndim_and_dtype(
            1, elem_type, ["omega", "w_bar", "grad_q", "grad_s"], locals())
        check_tensor_variables_ndim_and_dtype(2, "int32", ["KindexesT"],
                                              locals())

        # Now properly set up outputs to compute: grad_HT
        outputs = [CudaNdarrayType(broadcastable=(False, False))()]

        return Apply(self, params, outputs)
Beispiel #32
0
    def make_node(self,
                  V,
                  U,
                  UinvT,
                  Q,
                  H,
                  Y_indexes,
                  Y_values,
                  learning_rate,
                  use_qtilde=0,
                  use_lower=1,
                  invup_mode=1,
                  stabilize_period=10,
                  unfactorize_period=100,
                  debug_print=0):

        # The following are supposed to reside on the GPU
        V = as_cuda_ndarray_variable(V)
        U = as_cuda_ndarray_variable(U)
        UinvT = as_cuda_ndarray_variable(UinvT)
        Q = as_cuda_ndarray_variable(Q)
        H = as_cuda_ndarray_variable(H)

        # The following are on the CPU
        Y_indexes = as_tensor_variable(Y_indexes)
        Y_values = as_tensor_variable(Y_values)
        learning_rate = as_tensor_variable(learning_rate)
        use_qtilde = as_tensor_variable(use_qtilde)
        use_lower = as_tensor_variable(use_lower)
        invup_mode = as_tensor_variable(invup_mode)
        stabilize_period = as_tensor_variable(stabilize_period)
        unfactorize_period = as_tensor_variable(unfactorize_period)
        debug_print = as_tensor_variable(debug_print)

        # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
        # for k,v in locals().items():
        #     print k,':',type(v)
        # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"

        params = [
            V, U, UinvT, Q, H, Y_indexes, Y_values, learning_rate, use_qtilde,
            use_lower, invup_mode, stabilize_period, unfactorize_period,
            debug_print
        ]

        # make sure parameters are either all of dtype float32 or all of dtype float64 (except for Y_indexes which are integers)
        elem_type = V.dtype
        if elem_type != "float32" and elem_type != "float64":
            raise TypeError(
                "LargeSparseTargets parameter V must have dtype of float32 or float64"
            )

        check_tensor_variables_ndim_and_dtype(0, elem_type, ["learning_rate"],
                                              locals())
        check_tensor_variables_ndim_and_dtype(
            2, elem_type, ["V", "U", "UinvT", "Q", "H", "Y_values"], locals())
        check_tensor_variables_ndim_and_dtype(2, "int32", ["Y_indexes"],
                                              locals())

        # T.matrix(elem_type)

        # Now properly set up outputs to compute
        if self.what_to_output == 0:  # output scalar cost
            outputs = [T.scalar(elem_type)]
        elif self.what_to_output == 1:  # output grad_H
            outputs = [CudaNdarrayType(broadcastable=(False, False))()]
        elif self.what_to_output == 2:  # output cost and grad_H
            outputs = [
                T.scalar(elem_type),
                CudaNdarrayType(broadcastable=(False, False))()
            ]
        else:
            raise ValueError(
                "Invalid value for what_to_output: must be 0,1, or 2")

        return Apply(self, params, outputs)