Example #1
0
def local_gpua_advanced_incsubtensor(node, context_name):

    # This is disabled on non-cuda contexts
    if get_context(context_name).kind != 'cuda':
        return None

    x, y, ilist = node.inputs

    # Gpu Ops needs both inputs to have the same dtype
    if (x.type.dtype != y.type.dtype):
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.dtype != dtype:
            x = tensor.cast(x, dtype)
        if y.type.dtype != dtype:
            y = tensor.cast(y, dtype)

    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)['major']

    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(
            set_instead_of_inc=set_instead_of_inc)
Example #2
0
    def test_prod_without_zeros_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype
                        or (input_dtype in tensor.discrete_dtypes
                            and output_dtype in tensor.continuous_dtypes)):
                    prod_woz_var = ProdWithoutZeros(axis=axis,
                                                    dtype=output_dtype)(x)
                    assert prod_woz_var.dtype == output_dtype
                else:
                    self.assertRaises(
                        TypeError,
                        ProdWithoutZeros(axis=axis, dtype=output_dtype), x)

                idx += 1
Example #3
0
    def make_node(self, a, val, offset):
        a = tensor.as_tensor_variable(a)
        val = tensor.as_tensor_variable(val)
        offset = tensor.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError('%s: first parameter must have exactly'
                            ' two dimensions' % self.__class__.__name__)
        elif val.ndim != 0:
            raise TypeError('%s: second parameter must be a scalar'\
                            % self.__class__.__name__)
        elif offset.ndim != 0:
            raise TypeError('%s: third parameter must be a scalar'\
                            % self.__class__.__name__)
        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError('%s: type of second parameter must be the same'
                            ' as the first\'s' % self.__class__.__name__)
        elif offset.dtype[:3] != 'int':
            raise TypeError('%s: type of third parameter must be as integer'
                            ' use theano.tensor.cast( input, \'int32/int64\')' \
                            % self.__class__.__name__)



        return gof.Apply(self, [a, val, offset], [a.type()])
Example #4
0
    def test_prod_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a prod.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype
                        or (input_dtype in tensor.discrete_dtypes
                            and output_dtype in tensor.continuous_dtypes)):
                    prod_var = x.prod(dtype=output_dtype, axis=axis)
                    assert prod_var.dtype == output_dtype

                    if "complex" in output_dtype:
                        continue
                    # Check that we can take the gradient
                    grad_var = tensor.grad(prod_var.sum(),
                                           x,
                                           disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                                      x.prod,
                                      dtype=output_dtype,
                                      axis=axis)

                idx += 1
Example #5
0
def local_gpua_advanced_incsubtensor(node, context_name):

    # This is disabled on non-cuda contexts
    if get_context(context_name).kind != "cuda":
        return None

    x, y, ilist = node.inputs

    # Gpu Ops needs both inputs to have the same dtype
    if x.type.dtype != y.type.dtype:
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.dtype != dtype:
            x = tensor.cast(x, dtype)
        if y.type.dtype != dtype:
            y = tensor.cast(y, dtype)

    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)["major"]

    if compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
        return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(set_instead_of_inc=set_instead_of_inc)
Example #6
0
    def filter(self, data, strict=False, allow_downcast=None):
        if strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            # fallthrough to ndim check
        elif allow_downcast:
            data = gpuarray.array(data, dtype=self.typecode, copy=False,
                                  ndmin=len(self.broadcastable))
        else:
            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data, dtype=self.dtype, copy=False)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s, "
                            "got %s with shape %s." % (self.ndim, data.ndim,
                                                       data.shape), data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", shp, self.broadcastable)
        return data
Example #7
0
    def test_prod_without_zeros_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            output_dtype in tensor.continuous_dtypes)
                        ):
                    prod_woz_var = ProdWithoutZeros(
                            axis=axis, dtype=output_dtype)(x)
                    assert prod_woz_var.dtype == output_dtype
                else:
                    self.assertRaises(TypeError,
                            ProdWithoutZeros(axis=axis, dtype=output_dtype),
                            x)

                idx += 1
Example #8
0
    def test_sum_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a sum.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                # If the output is a complex, the gradient of the sum will
                # cast the complex to the input dtype. We can't call the normal
                # cast on a complex to a not complex as this is ambiguous.
                if not input_dtype.startswith("complex") and output_dtype.startswith("complex"):
                    continue

                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if output_dtype == upcasted_dtype or (
                    input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes
                ):
                    sum_var = x.sum(dtype=output_dtype, axis=axis)
                    assert sum_var.dtype == output_dtype

                    if "complex" in input_dtype:
                        continue
                    # Check that we can take the gradient
                    grad_var = tensor.grad(sum_var.sum(), x, disconnected_inputs="ignore")
                else:
                    self.assertRaises(TypeError, x.sum, dtype=output_dtype, axis=axis)

                idx += 1
Example #9
0
def local_gpua_advanced_incsubtensor(node):

    # This optimization is disabled if cuda is not active
    if pygpu.get_default_context().kind != "cuda":
        return None

    x, y, ilist = node.inputs

    # Gpu Ops needs both inputs to have the same dtype
    if (x.type.dtype != y.type.dtype):
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.dtype != dtype:
            x = tensor.cast(x, dtype)
        if y.type.dtype != dtype:
            y = tensor.cast(y, dtype)

    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)['major']

    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return [GpuAdvancedIncSubtensor1(
                set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
    else:
        return [GpuAdvancedIncSubtensor1_dev20(
                set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
Example #10
0
    def test_prod_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a prod.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            output_dtype in tensor.continuous_dtypes)
                        ):
                    prod_var = x.prod(dtype=output_dtype, axis=axis)
                    assert prod_var.dtype == output_dtype

                    if "complex" in output_dtype:
                        continue
                    # Check that we can take the gradient
                    grad_var = tensor.grad(prod_var.sum(), x,
                            disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                            x.prod, dtype=output_dtype, axis=axis)

                idx += 1
Example #11
0
    def filter(self, data, strict=False, allow_downcast=None):
        if strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            # fallthrough to ndim check
        elif allow_downcast:
            data = gpuarray.array(data, dtype=self.typecode, copy=False,
                                  ndmin=len(self.broadcastable))
        else:
            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data, dtype=self.dtype, copy=False)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s, "
                            "got %s with shape %s." % (self.ndim, data.ndim,
                                                       data.shape), data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", shp, self.broadcastable)
        return data
Example #12
0
    def filter_inplace(self, data, old_data, strict=False, allow_downcast=None):
        if strict or allow_downcast or isinstance(data, cuda.CudaNdarray):
            return cuda.filter(data, self.broadcastable, strict, old_data)

        else:  # (not strict) and (not allow_downcast)
            # Check if data.dtype can be accurately cast to self.dtype
            if isinstance(data, numpy.ndarray):
                up_dtype = scal.upcast(self.dtype, data.dtype)
                if up_dtype == self.dtype:
                    return cuda.filter(data, self.broadcastable, strict, old_data)
                else:
                    raise TypeError(
                        "%s, with dtype %s, cannot store a value of "
                        "dtype %s without risking loss of precision."
                        "If you do not mind, please cast your data to %s." % (self, self.dtype, data.dtype, self.dtype),
                        data,
                    )
            else:
                converted_data = theano._asarray(data, self.dtype)

                if allow_downcast is None and type(data) is float and self.dtype == theano.config.floatX:
                    return cuda.filter(converted_data, self.broadcastable, strict, old_data)
                elif numpy.all(data == converted_data):
                    return cuda.filter(converted_data, self.broadcastable, strict, old_data)
                else:
                    raise TypeError(
                        "%s, with dtype %s, cannot store accurately value %s, "
                        "it would be represented as %s. If you do not mind, "
                        "you can cast your data to %s." % (self, self.dtype, data, converted_data, self.dtype),
                        data,
                    )
Example #13
0
    def test_prod_without_zeros_custom_acc_dtype(self):
        """
        Test ability to provide your own acc_dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for acc_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If acc_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            acc_dtype in tensor.continuous_dtypes)
                        ):
                    prod_woz_var = ProdWithoutZeros(
                            axis=axis, acc_dtype=acc_dtype)(x)
                    assert prod_woz_var.owner.op.acc_dtype == acc_dtype

                    if (acc_dtype.startswith('complex') and
                        input_dtype != acc_dtype):
                        continue
                    f = theano.function([x], prod_woz_var)
                    data = numpy.random.rand(2, 3) * 3
                    data = data.astype(input_dtype)
                    f(data)
                else:
                    self.assertRaises(TypeError,
                            ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype),
                            x)

                idx += 1
Example #14
0
    def test_reduce_custom_acc_dtype(self):
        # Test the ability to provide your own accumulator dtype for a reduce.

        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
                for acc_dtype in self.dtypes:
                    # If the accumulator is a complex, the gradient of the reduce will
                    # cast the complex to the input dtype. We can't call the normal
                    # cast on a complex to a not complex as this is ambiguous.
                    if not input_dtype.startswith("complex") and acc_dtype.startswith("complex"):
                        continue

                    axis = self.axes[idx % len(self.axes)]
                    # If output_dtype would force a downcast, we expect a TypeError
                    # We always allow int/uint inputs with float/complex outputs.
                    upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                    if acc_dtype == upcasted_dtype or (
                        input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes
                    ):
                        var = getattr(x, method)(acc_dtype=acc_dtype, axis=axis)
                        assert var.owner.op.acc_dtype == acc_dtype

                        if "complex" in input_dtype:
                            continue
                        # Check that we can take the gradient
                        tensor.grad(var.sum(), x, disconnected_inputs="ignore")
                    else:
                        self.assertRaises(TypeError, getattr(x, method), acc_dtype=acc_dtype, axis=axis)

                    idx += 1
Example #15
0
def local_gpua_advanced_incsubtensor(node, context_name):
    context = get_context(context_name)
    # This is disabled on non-cuda contexts
    if context.kind != 'cuda':
        return None

    x, y, ilist = node.inputs

    # Gpu Ops needs both inputs to have the same dtype
    if (x.type.dtype != y.type.dtype):
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.dtype != dtype:
            x = tensor.cast(x, dtype)
        if y.type.dtype != dtype:
            y = tensor.cast(y, dtype)

    set_instead_of_inc = node.op.set_instead_of_inc

    compute_capability = int(context.bin_id[-2])

    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return GpuAdvancedIncSubtensor1(
            set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(
            set_instead_of_inc=set_instead_of_inc)
Example #16
0
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray)
                and data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self, data,
                                type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif (allow_downcast or (allow_downcast is None and type(data) == float
                                 and self.dtype == config.floatX)):
            data = gpuarray.array(data,
                                  dtype=self.typecode,
                                  copy=False,
                                  ndmin=len(self.broadcastable),
                                  context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                converted_data = theano._asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(np.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data
                    data = gpuarray.array(data, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data,
                                      dtype=self.dtype,
                                      copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError(
                "Wrong number of dimensions: expected %s, "
                "got %s with shape %s." % (self.ndim, data.ndim, data.shape),
                data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError(
                    "Non-unit value on shape on a broadcastable"
                    " dimension.", shp, self.broadcastable)
        return data
Example #17
0
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray)
                and data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self, data,
                                type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
            elif (allow_downcast
                  or (allow_downcast is None and type(data) == float
                      and self.dtype == config.floatX)):
                data = gpuarray.array(data,
                                      dtype=self.typecode,
                                      copy=False,
                                      ndmin=len(self.broadcastable),
                                      context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                # This is to convert objects that don't have a dtype
                # (like lists).  We anticipate that the type below
                # will match and we pass copy=False so it won't make a
                # second object on the GPU.
                data = gpuarray.array(data, copy=False, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data,
                                      dtype=self.dtype,
                                      copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError(
                "Wrong number of dimensions: expected %s, "
                "got %s with shape %s." % (self.ndim, data.ndim, data.shape),
                data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError(
                    "Non-unit value on shape on a broadcastable"
                    " dimension.", shp, self.broadcastable)
        return data
Example #18
0
 def make_node(self, A, b):
     A_ = tensor.as_tensor_variable(A)
     b_ = tensor.as_tensor_variable(b)
     if A_.broadcastable != (False, False):
         raise TypeError("A must be a matrix", A_.type)
     if b_.broadcastable not in ((False, ), (True, False), (False, False)):
         raise TypeError("b must be a matrix or vector", b_.type)
     odtype = scalar.upcast(A_.dtype, b_.dtype)
     otype = tensor.TensorType(broadcastable=b_.broadcastable, dtype=odtype)
     return gof.Apply(op=self, inputs=[A_, b_], outputs=[otype()])
Example #19
0
 def make_node(self, A, b):
     A_ = tensor.as_tensor_variable(A)
     b_ = tensor.as_tensor_variable(b)
     if A_.broadcastable != (False, False):
         raise TypeError("A must be a matrix", A_.type)
     if b_.broadcastable not in ((False,), (True, False), (False, False)):
         raise TypeError("b must be a matrix or vector", b_.type)
     odtype = scalar.upcast(A_.dtype, b_.dtype)
     otype = tensor.TensorType(broadcastable=b_.broadcastable, dtype=odtype)
     return gof.Apply(op=self, inputs=[A, B], outputs=[otype()])
Example #20
0
File: type.py Project: wgapl/Theano
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray) and
                data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif (allow_downcast or
              (allow_downcast is None and
               type(data) == float and
               self.dtype == config.floatX)):
            data = gpuarray.array(data, dtype=self.typecode, copy=False,
                                  ndmin=len(self.broadcastable),
                                  context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                converted_data = theano._asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(numpy.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data
                    data = gpuarray.array(data, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data, dtype=self.dtype, copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s, "
                            "got %s with shape %s." % (self.ndim, data.ndim,
                                                       data.shape), data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", shp, self.broadcastable)
        return data
Example #21
0
 def make_node(self, a, val):
     a = tensor.as_tensor_variable(a)
     val = tensor.as_tensor_variable(val)
     if a.ndim < 2:
         raise TypeError("%s: first parameter must have at least" " two dimensions" % self.__class__.__name__)
     elif val.ndim != 0:
         raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__)
     val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
     if val.dtype != a.dtype:
         raise TypeError("%s: type of second parameter must be the same as" " the first's" % self.__class__.__name__)
     return gof.Apply(self, [a, val], [a.type()])
Example #22
0
    def make_node(self, x, y, p):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)

        if not _is_sparse_variable(p):
            raise TypeError(p)

        #TODO: use it.
        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype)

        return gof.Apply(self, [x, y, p], [p.type()])
Example #23
0
    def make_node(self, x, y, pattern):
        if (_is_sparse_variable(x) or _is_sparse_variable(y)
                or _is_sparse_variable(pattern)):
            raise TypeError(x)

        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
        pattern = tensor.as_tensor_variable(pattern)

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
                                  pattern.type.dtype)
        blas_out = scalar.upcast(x.type.dtype, y.type.dtype)

        # We call blas ?axpy function that take only param of the same type
        x = tensor.cast(x, blas_out)
        y = tensor.cast(y, blas_out)

        return gof.Apply(
            self, [x, y, pattern],
            [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))])
Example #24
0
    def make_node(self, x, y, p):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)

        if not _is_sparse_variable(p):
            raise TypeError(p)

        #TODO: use it.
        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype)

        return gof.Apply(self, [x, y, p], [p.type()])
Example #25
0
    def filter(self, data, strict=False, allow_downcast=None):
        if (isinstance(data, gpuarray.GpuArray) and
                data.typecode == self.typecode):
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
                                (self, self.typecode, self.dtype,
                                 data.typecode, str(data.dtype)))
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
            elif (allow_downcast or
                  (allow_downcast is None and
                   type(data) == float and
                   self.dtype == config.floatX)):
                data = gpuarray.array(data, dtype=self.typecode, copy=False,
                                      ndmin=len(self.broadcastable),
                                      context=self.context)
        else:
            if not hasattr(data, 'dtype'):
                # This is to convert objects that don't have a dtype
                # (like lists).  We anticipate that the type below
                # will match and we pass copy=False so it won't make a
                # second object on the GPU.
                data = gpuarray.array(data, copy=False, context=self.context)

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                data = gpuarray.array(data, dtype=self.dtype, copy=False,
                                      context=self.context)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
                                (self, data.dtype))

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s, "
                            "got %s with shape %s." % (self.ndim, data.ndim,
                                                       data.shape), data)
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", shp, self.broadcastable)
        return data
Example #26
0
    def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
        p_data = tensor.as_tensor_variable(p_data)
        p_ind = tensor.as_tensor_variable(p_ind)
        p_ptr = tensor.as_tensor_variable(p_ptr)
        p_ncols = tensor.as_tensor_variable(p_ncols)

        assert p_ncols.dtype == 'int32'

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
                                  p_data.type.dtype)
        dot_out = scalar.upcast(x.type.dtype, y.type.dtype)

        # We call blas ?dot function that take only param of the same type
        x = tensor.cast(x, dot_out)
        y = tensor.cast(y, dot_out)

        return gof.Apply(self, [x, y, p_data, p_ind, p_ptr, p_ncols], [
            tensor.tensor(dtype=dtype_out, broadcastable=(False,)),
            tensor.tensor(dtype=p_ind.type.dtype, broadcastable=(False,)),
            tensor.tensor(dtype=p_ptr.type.dtype, broadcastable=(False,))
        ])
Example #27
0
    def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
        p_data = tensor.as_tensor_variable(p_data)
        p_ind = tensor.as_tensor_variable(p_ind)
        p_ptr = tensor.as_tensor_variable(p_ptr)
        p_ncols = tensor.as_tensor_variable(p_ncols)

        assert p_ncols.dtype == 'int32'

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
                                  p_data.type.dtype)
        dot_out = scalar.upcast(x.type.dtype, y.type.dtype)

        # We call blas ?dot function that take only param of the same type
        x = tensor.cast(x, dot_out)
        y = tensor.cast(y, dot_out)

        return gof.Apply(self, [x, y, p_data, p_ind, p_ptr, p_ncols], [
            tensor.tensor(dtype=dtype_out, broadcastable=(False, )),
            tensor.tensor(dtype=p_ind.type.dtype, broadcastable=(False, )),
            tensor.tensor(dtype=p_ptr.type.dtype, broadcastable=(False, ))
        ])
Example #28
0
 def make_node(self, a, val):
     a = tensor.as_tensor_variable(a)
     val = tensor.as_tensor_variable(val)
     if a.ndim < 2:
         raise TypeError('%s: first parameter must have at least'
                         ' two dimensions' % self.__class__.__name__)
     elif val.ndim != 0:
         raise TypeError('%s: second parameter must be a scalar'
                         % self.__class__.__name__)
     val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
     if val.dtype != a.dtype:
         raise TypeError('%s: type of second parameter must be the same as'
                         ' the first\'s' % self.__class__.__name__)
     return gof.Apply(self, [a, val], [a.type()])
Example #29
0
 def make_node(self, a, val):
     a = basic.as_tensor_variable(a)
     val = basic.as_tensor_variable(val)
     if a.ndim < 2:
         raise TypeError("%s: first parameter must have at least"
                         " two dimensions" % self.__class__.__name__)
     elif val.ndim != 0:
         raise TypeError("%s: second parameter must be a scalar" %
                         self.__class__.__name__)
     val = basic.cast(val, dtype=upcast(a.dtype, val.dtype))
     if val.dtype != a.dtype:
         raise TypeError("%s: type of second parameter must be the same as"
                         " the first's" % self.__class__.__name__)
     return Apply(self, [a, val], [a.type()])
Example #30
0
    def make_node(self, x, y, pattern):
        if (_is_sparse_variable(x) or _is_sparse_variable(y)
                or _is_sparse_variable(pattern)):
            raise TypeError(x)

        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
        pattern = tensor.as_tensor_variable(pattern)

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
                                  pattern.type.dtype)

        return gof.Apply(
            self, [x, y, pattern],
            [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))])
Example #31
0
    def make_node(self, a, val, offset):
        a = tensor.as_tensor_variable(a)
        val = tensor.as_tensor_variable(val)
        offset = tensor.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError("%s: first parameter must have exactly" " two dimensions" % self.__class__.__name__)
        elif val.ndim != 0:
            raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__)
        elif offset.ndim != 0:
            raise TypeError("%s: third parameter must be a scalar" % self.__class__.__name__)
        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError("%s: type of second parameter must be the same" " as the first's" % self.__class__.__name__)
        elif offset.dtype not in theano.tensor.integer_dtypes:
            raise TypeError(
                "%s: type of third parameter must be as integer"
                " use theano.tensor.cast( input, 'int32/int64')" % self.__class__.__name__
            )

        return gof.Apply(self, [a, val, offset], [a.type()])
Example #32
0
    def test_prod_custom_acc_dtype(self):
        """
        Test the ability to provide your own acc_dtype for a prod.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for acc_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If acc_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if (acc_dtype == upcasted_dtype
                        or (input_dtype in tensor.discrete_dtypes
                            and acc_dtype in tensor.continuous_dtypes)):
                    prod_var = x.prod(acc_dtype=acc_dtype, axis=axis)
                    assert prod_var.owner.op.acc_dtype == acc_dtype

                    if (acc_dtype.startswith('complex')
                            and input_dtype != acc_dtype):
                        continue
                    f = theano.function([x], prod_var)
                    data = numpy.random.rand(3, 4) * 10
                    data = data.astype(input_dtype)
                    f(data)

                    if "complex" in acc_dtype:
                        continue
                    # Check that we can take the gradient
                    tensor.grad(prod_var.sum(),
                                x,
                                disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                                      x.prod,
                                      acc_dtype=acc_dtype,
                                      axis=axis)

                idx += 1
Example #33
0
    def test_sum_custom_dtype(self):
        """
        Test the ability to provide your own output dtype for a sum.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for output_dtype in imap(str, theano.scalar.all_types):
                # If the output is a complex, the gradient of the sum will
                # cast the complex to the input dtype. We can't call the normal
                # cast on a complex to a not complex as this is ambiguous.
                if (not input_dtype.startswith('complex')
                        and output_dtype.startswith('complex')):
                    continue

                axis = axes[idx % len(axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, output_dtype)
                if (output_dtype == upcasted_dtype
                        or (input_dtype in tensor.discrete_dtypes
                            and output_dtype in tensor.continuous_dtypes)):
                    sum_var = x.sum(dtype=output_dtype, axis=axis)
                    assert sum_var.dtype == output_dtype

                    if "complex" in input_dtype:
                        continue
                    # Check that we can take the gradient
                    grad_var = tensor.grad(sum_var.sum(),
                                           x,
                                           disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                                      x.sum,
                                      dtype=output_dtype,
                                      axis=axis)

                idx += 1
Example #34
0
    def test_reduce_custom_acc_dtype(self):
        """
        Test the ability to provide your own accumulator dtype for a reduce.
        """
        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
                for acc_dtype in self.dtypes:
                # If the accumulator is a complex, the gradient of the reduce will
                # cast the complex to the input dtype. We can't call the normal
                # cast on a complex to a not complex as this is ambiguous.
                    if (not input_dtype.startswith('complex') and
                        acc_dtype.startswith('complex')):
                        continue

                    axis = self.axes[idx % len(self.axes)]
                # If output_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                    upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                    if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            acc_dtype in tensor.continuous_dtypes)
                        ):
                        var = getattr(x, method)(acc_dtype=acc_dtype,
                                                 axis=axis)
                        assert var.owner.op.acc_dtype == acc_dtype

                        if "complex" in input_dtype:
                            continue
                    # Check that we can take the gradient
                        tensor.grad(var.sum(), x,
                                    disconnected_inputs='ignore')
                    else:
                        self.assertRaises(TypeError,
                                          getattr(x, method),
                                          acc_dtype=acc_dtype, axis=axis)

                    idx += 1
Example #35
0
    def test_prod_custom_acc_dtype(self):
        """
        Test the ability to provide your own acc_dtype for a prod.
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [], [0], [1], [0, 1]]
        idx = 0
        for input_dtype in imap(str, theano.scalar.all_types):
            x = tensor.matrix(dtype=input_dtype)
            for acc_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
                # If acc_dtype would force a downcast, we expect a TypeError
                # We always allow int/uint inputs with float/complex outputs.
                upcasted_dtype = scalar.upcast(input_dtype, acc_dtype)
                if (acc_dtype == upcasted_dtype or
                        (input_dtype in tensor.discrete_dtypes and
                            acc_dtype in tensor.continuous_dtypes)
                        ):
                    prod_var = x.prod(acc_dtype=acc_dtype, axis=axis)
                    assert prod_var.owner.op.acc_dtype == acc_dtype

                    if (acc_dtype.startswith('complex') and
                        input_dtype != acc_dtype):
                        continue
                    f = theano.function([x], prod_var)
                    data = numpy.random.rand(3, 4) * 10
                    data = data.astype(input_dtype)
                    f(data)

                    if "complex" in acc_dtype:
                        continue
                    # Check that we can take the gradient
                    tensor.grad(prod_var.sum(), x,
                                disconnected_inputs='ignore')
                else:
                    self.assertRaises(TypeError,
                            x.prod, acc_dtype=acc_dtype, axis=axis)

                idx += 1
Example #36
0
    def filter_inplace(self,
                       data,
                       old_data,
                       strict=False,
                       allow_downcast=None):
        if strict or allow_downcast or isinstance(data, cuda.CudaNdarray):
            return cuda.filter(data, self.broadcastable, strict, old_data)

        else:  # (not strict) and (not allow_downcast)
            # Check if data.dtype can be accurately cast to self.dtype
            if isinstance(data, numpy.ndarray):
                up_dtype = scal.upcast(self.dtype, data.dtype)
                if up_dtype == self.dtype:
                    return cuda.filter(data, self.broadcastable, strict,
                                       old_data)
                else:
                    raise TypeError(
                        '%s, with dtype %s, cannot store a value of '
                        'dtype %s without risking loss of precision.'
                        'If you do not mind, please cast your data to %s.' %
                        (self, self.dtype, data.dtype, self.dtype), data)
            else:
                converted_data = theano._asarray(data, self.dtype)

                if (allow_downcast is None and type(data) is float
                        and self.dtype == theano.config.floatX):
                    return cuda.filter(converted_data, self.broadcastable,
                                       strict, old_data)
                elif numpy.all(data == converted_data):
                    return cuda.filter(converted_data, self.broadcastable,
                                       strict, old_data)
                else:
                    raise TypeError(
                        '%s, with dtype %s, cannot store accurately value %s, '
                        'it would be represented as %s. If you do not mind, '
                        'you can cast your data to %s.' %
                        (self, self.dtype, data, converted_data, self.dtype),
                        data)
Example #37
0
    def make_node(self, a, val, offset):
        a = basic.as_tensor_variable(a)
        val = basic.as_tensor_variable(val)
        offset = basic.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError("%s: first parameter must have exactly"
                            " two dimensions" % self.__class__.__name__)
        elif val.ndim != 0:
            raise TypeError("%s: second parameter must be a scalar" %
                            self.__class__.__name__)
        elif offset.ndim != 0:
            raise TypeError("%s: third parameter must be a scalar" %
                            self.__class__.__name__)
        val = basic.cast(val, dtype=upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError("%s: type of second parameter must be the same"
                            " as the first's" % self.__class__.__name__)
        elif offset.dtype not in theano.tensor.integer_dtypes:
            raise TypeError("%s: type of third parameter must be as integer"
                            " use theano.tensor.cast( input, 'int32/int64')" %
                            self.__class__.__name__)

        return Apply(self, [a, val, offset], [a.type()])
Example #38
0
    def make_node(self, a, val, offset):
        a = tensor.as_tensor_variable(a)
        val = tensor.as_tensor_variable(val)
        offset = tensor.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError('%s: first parameter must have exactly'
                            ' two dimensions' % self.__class__.__name__)
        elif val.ndim != 0:
            raise TypeError('%s: second parameter must be a scalar'
                            % self.__class__.__name__)
        elif offset.ndim != 0:
            raise TypeError('%s: third parameter must be a scalar'
                            % self.__class__.__name__)
        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError('%s: type of second parameter must be the same'
                            ' as the first\'s' % self.__class__.__name__)
        elif offset.dtype[:3] != 'int':
            raise TypeError('%s: type of third parameter must be as integer'
                            ' use theano.tensor.cast( input, \'int32/int64\')'
                            % self.__class__.__name__)

        return gof.Apply(self, [a, val, offset], [a.type()])
Example #39
0
File: opt.py Project: npinto/Theano
def local_usmm_csx(node):
    """ usmm -> usmm_csc_dense """
    if node.op == usmm:
        alpha, x, y, z = node.inputs

        x_is_sparse_variable = _is_sparse_variable(x)
        y_is_sparse_variable = _is_sparse_variable(y)

        if x_is_sparse_variable and not y_is_sparse_variable:
            if x.type.format == 'csc':
                x_val, x_ind, x_ptr, x_shape = csm_properties(x)
                x_nsparse = x_shape[0]
                dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype,
                                          y.type.dtype, z.type.dtype)
                if dtype_out not in ('float32', 'float64'):
                    return False
                # Sparse cast is not implemented.
                if y.type.dtype != dtype_out:
                    return False

                return [usmm_csc_dense(alpha, x_val, x_ind, x_ptr,
                                       x_nsparse, y, z)]
    return False
Example #40
0
    def c_code(self, node, name, inputs, outputs, sub):
        x, y, p_data, p_ind, p_ptr, p_ncols = inputs
        z_data, z_ind, z_ptr = outputs
        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for x')
        if node.inputs[1].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for y')
        if node.inputs[2].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError(
                'Complex types are not supported for pattern')

        dot_out = scalar.upcast(node.inputs[0].type.dtype,
                                node.inputs[1].type.dtype)

        if dot_out == "float32":
            conv_type = "float"
            cdot = "sdot_"
        else:
            conv_type = "double"
            cdot = "ddot_"

        # retrieve dtype number
        typenum_x = node.inputs[0].type.dtype_specs()[-1]
        typenum_y = node.inputs[1].type.dtype_specs()[-1]
        typenum_p = node.inputs[2].type.dtype_specs()[-1]
        typenum_zd = tensor.TensorType(node.outputs[0].dtype,
                                       []).dtype_specs()[-1]
        typenum_zi = tensor.TensorType(node.outputs[1].dtype,
                                       []).dtype_specs()[-1]
        typenum_zp = tensor.TensorType(node.outputs[2].dtype,
                                       []).dtype_specs()[-1]

        rval = """
        if (%(x)s->nd != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
        if (%(y)s->nd != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}

        if (%(x)s->descr->type_num != %(typenum_x)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for x");
            %(fail)s;}

        if (%(y)s->descr->type_num != %(typenum_y)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for y");
            %(fail)s;}

        if (%(p_data)s->descr->type_num != %(typenum_p)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for pattern");
            %(fail)s;}

        if (%(x)s->dimensions[1] != %(y)s->dimensions[1]) {
            PyErr_SetString(PyExc_NotImplementedError,
              "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
            %(fail)s;}

        if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] ||
            %(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1))
        {PyErr_SetString(PyExc_NotImplementedError,
        "The dimension of the pattern and the output must match"); %(fail)s;}

        // Allocate output
        if (!%(z_data)s
            || (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0])
            || (%(z_data)s->descr->type_num != %(typenum_zd)s)) {
            {Py_XDECREF(%(z_data)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_data)s->dimensions[0];
            %(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                            %(typenum_zd)s);
        }
        if (!%(z_ind)s
            || (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0])
            || (%(z_ind)s->descr->type_num != %(typenum_zi)s)) {
            {Py_XDECREF(%(z_ind)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_ind)s->dimensions[0];
            %(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                           %(typenum_zi)s);
        }
        if (!%(z_ptr)s
            || (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0])
            || (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) {
            {Py_XDECREF(%(z_ptr)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_ptr)s->dimensions[0];
            %(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                           %(typenum_zp)s);
        }

        {
            // Product of MxK and NxK, output MxN
            npy_intp M = %(x)s->dimensions[0];
            npy_intp N = %(y)s->dimensions[0];
            npy_intp K = %(y)s->dimensions[1];

            // pointers to access actual data in the arrays passed as params.
            const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
            const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data;
            const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data;
            const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data;
            const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data;
            dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data;
            dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data;
            dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data;
            
            const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize;
            const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize;
            const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize;
            const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize;
            const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize;
            const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize;
            const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize;
            const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize;
            
            memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s));
            memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s));
            
            for (npy_int32 m = 0; m < M; ++m) {
                for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
                    const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
                    
                    const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m);
                    
                    const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n);
                    
                    Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
                }
            }
        }
        """ % dict(locals(), **sub)

        return rval
Example #41
0
    def filter(self, data, strict=False, allow_downcast=None):
        """Convert `data` to something which can be associated to a
        `TensorVariable`.

        This function is not meant to be called in user code.  It is for
        `Linker` instances to use when running a compiled graph.
        """
        # Explicit error message when one accidentally uses a Variable as
        # input (typical mistake, especially with shared variables).
        if isinstance(data, Variable):
            raise TypeError(
                    'Expected an array-like object, but found a Variable: '
                    'maybe you are trying to call a function on a (possibly '
                    'shared) variable instead of a numeric array?')

        if ((type(data) is numpy.ndarray)
                and (data.dtype == self.numpy_dtype)):
            if data.dtype.num != self.numpy_dtype.num:
                data = theano._asarray(data, dtype=self.dtype)
            # -- now fall through to ndim check
        elif((type(data) is numpy.memmap)
                and (data.dtype == self.numpy_dtype)):
            # numpy.memmap is a "safe" subclass of ndarray,
            # so we can use it whereever we expect a base ndarray.
            # however, casting it would defeat the purpose of not
            # loading the whole data into memory
            pass
        elif strict:
            # If any of the two conditions above was not met,
            # we raise a meaningful TypeError.
            if not (type(data) is numpy.ndarray):
                raise TypeError("%s expected a ndarray object." % self,
                        data, type(data))
            if data.dtype != self.numpy_dtype:
                raise TypeError(("%s expected a ndarray object with "
                        "dtype = %s (got %s).") % (
                            self, self.numpy_dtype, data.dtype))
            assert False, "This point should never be reached."
        else:
            if allow_downcast:
                # Convert to self.dtype, regardless of the type of data
                data = theano._asarray(data, dtype=self.dtype)
                # TODO: consider to pad shape with ones to make it consistent
                # with self.broadcastable... like vector->row type thing
            else:
                if isinstance(data, numpy.ndarray):
                    # Check if self.dtype can accurately represent data
                    # (do not try to convert the data)
                    up_dtype = scal.upcast(self.dtype, data.dtype)
                    if up_dtype == self.dtype:
                        # Bug in the following line when data is a
                        # scalar array, see
                        # http://projects.scipy.org/numpy/ticket/1611
                        # data = data.astype(self.dtype)
                        data = theano._asarray(data, dtype=self.dtype)
                    if up_dtype != self.dtype:
                        err_msg = (
                            '%s cannot store a value of dtype %s without '
                            'risking loss of precision. If you do not mind '
                            'this loss, you can: '
                            '1) explicitly cast your data to %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data.dtype, self.dtype))
                        raise TypeError(err_msg, data)
                elif (allow_downcast is None and
                        type(data) is float and
                        self.dtype == theano.config.floatX):
                    # Special case where we allow downcasting of Python float
                    # literals to floatX, even when floatX=='float32'
                    data = theano._asarray(data, self.dtype)
                else:
                    # data has to be converted.
                    # Check that this conversion is lossless
                    converted_data = theano._asarray(data, self.dtype)
                    # We use the `values_eq` static function from TensorType
                    # to handle NaN values.
                    if TensorType.values_eq(numpy.asarray(data),
                                            converted_data,
                                            force_same_dtype=False):
                        data = converted_data
                    else:
                        # Do not print a too long description of data
                        # (ndarray truncates it, but it's not sure for data)
                        str_data = str(data)
                        if len(str_data) > 80:
                            str_data = str_data[:75] + '(...)'

                        err_msg = (
                            '%s cannot store accurately value %s, '
                            'it would be represented as %s. '
                            'If you do not mind this precision loss, you can: '
                            '1) explicitly convert your data to a numpy array '
                            'of dtype %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data, converted_data, self.dtype))
                        raise TypeError(err_msg, data)

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s,"
                            " got %s with shape %s." % (self.ndim, data.ndim,
                                                        data.shape))
        if not data.flags.aligned:
            try:
                msg = "object buffer" + str(data.data)
            except AttributeError:
                msg = ""
            raise TypeError("The numpy.ndarray object is not aligned."
                            " Theano C code does not support that.",
                            msg,
                            "object shape", data.shape,
                            "object strides", data.strides,
                            "object dtype", data.dtype)

        i = 0
        for b in self.broadcastable:
            if b and data.shape[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", data.shape, self.broadcastable)
            i += 1
        if (self.filter_checks_isfinite and
            not numpy.all(numpy.isfinite(data))):
            raise ValueError("non-finite elements not allowed")
        return data
Example #42
0
class SamplingDotDenseGrad(gof.Op):
    """
    Optimized gradient of SamplingDot when the pattern P is a dense matrix.
    """
    def __eq__(self, other):
        return type(self) == type(other)

    def __hash__(self):
        return hash(type(self))

    def __str__(self):
        return 'SamplingDotGrad{Dense}'

    def make_node(self, x, y, pattern):
        if (_is_sparse_variable(x) or _is_sparse_variable(y)
                or _is_sparse_variable(pattern)):
            raise TypeError(x)

        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
        pattern = tensor.as_tensor_variable(pattern)

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
                                  pattern.type.dtype)
        blas_out = scalar.upcast(x.type.dtype, y.type.dtype)

        # We call blas ?axpy function that take only param of the same type
        x = tensor.cast(x, blas_out)
        y = tensor.cast(y, blas_out)

        return gof.Apply(
            self, [x, y, pattern],
            [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))])

    #def perform(self, node, (x, y, pattern), (out,)):
    #    raise NotImplemented()

    def c_support_code(self):
        return blas.blas_header_text()

    def c_libraries(self):
        return blas.ldflags()

    def c_compile_args(self):
        return blas.ldflags(libs=False, flags=True)

    def c_lib_dirs(self):
        return blas.ldflags(libs=False, libs_dir=True)

    def c_header_dirs(self):
        return blas.ldflags(libs=False, include_dir=True)

    def c_code(self, node, name, (x, y, pattern), (z, ), sub):
        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for x')
        if node.inputs[1].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for y')
        if node.inputs[2].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError(
                'Complex types are not supported for pattern')

        blas_out = scalar.upcast(node.inputs[0].type.dtype,
                                 node.inputs[0].type.dtype)
        if blas_out == "float32":
            conv_type = "float"
            axpy = "saxpy_"
        else:
            conv_type = "double"
            axpy = "daxpy_"

        typenum_x = node.inputs[0].type.dtype_specs()[
            -1]  # retrieve dtype number
        typenum_y = node.inputs[1].type.dtype_specs()[
            -1]  # retrieve dtype number
        typenum_pattern = node.inputs[2].type.dtype_specs()[
            -1]  # retrieve dtype number
        typenum_z = tensor.TensorType(
            node.outputs[0].dtype,
            []).dtype_specs()[-1]  # retrieve dtype number

        rval = """
        if (%(x)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
        if (%(y)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
        if (%(pattern)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(pattern) != 2"); %(fail)s;}

        if (%(x)s->descr->type_num != %(typenum_x)s) {
        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x"); %(fail)s;}

        if (%(y)s->descr->type_num != %(typenum_y)s) {
        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}

        if (%(pattern)s->descr->type_num != %(typenum_pattern)s) {
        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for pattern"); %(fail)s;}

        if (%(x)s->dimensions[1] != %(y)s->dimensions[0])
        {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}

        if (%(pattern)s->dimensions[0] != %(x)s->dimensions[0] || %(pattern)s->dimensions[1] != %(x)s->dimensions[1])
        {PyErr_SetString(PyExc_NotImplementedError, "The dimension of x and the pattern must match"); %(fail)s;}

        if (!%(z)s
            || (%(z)s->dimensions[0] != %(x)s->dimensions[0])
            || (%(z)s->dimensions[1] != %(y)s->dimensions[1])
            )
        {
            {Py_XDECREF(%(z)s);}
            npy_intp dims[] = {0,0};
            dims[0] = %(x)s->dimensions[0];
            dims[1] = %(y)s->dimensions[1];
            %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
        }

        {
            // sparse array has size MxK, dense KxN, output MxN
            npy_intp M = %(z)s->dimensions[0];
            npy_intp N = %(z)s->dimensions[1];
            npy_intp K = %(y)s->dimensions[0];

            // pointers to access actual data in the arrays passed as params.
            dtype_%(z)s* __restrict__ Dz   = (dtype_%(z)s*)%(z)s->data;
            const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
            const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data;
            const dtype_%(pattern)s* __restrict__ Dpattern = (dtype_%(pattern)s*)%(pattern)s->data;

            const npy_intp Sdz = %(z)s->strides[1]/%(z)s->descr->elsize;
            const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize;
            const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize;
            const npy_intp Sdp = %(pattern)s->strides[1]/%(pattern)s->descr->elsize;

            //clear the output array
            memset(Dz, 0, M*N*sizeof(dtype_%(z)s));

            for (npy_int32 m = 0; m < M; ++m) {
                // pointer to m-th row of the output matrix Z
                dtype_%(z)s* const __restrict__ z_row = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);

                const dtype_%(pattern)s* p_row = (dtype_%(pattern)s*)(%(pattern)s->data + %(pattern)s->strides[0] * m);

                const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m);

                for (npy_int32 k = 0; k < K; ++k) {
                    if (*(p_row + k*Sdp) != 0) {
                        const dtype_%(x)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k);

                        %(axpy)s((int*)&N, (%(conv_type)s*)(x_row + k*Sdx), (%(conv_type)s*)y_row, (int*)&Sdy, (%(conv_type)s*)z_row, (int*)&Sdz);
                    }
                }
            }

        }
        """ % dict(locals(), **sub)

        return rval
Example #43
0
    def filter(self, data, strict=False, allow_downcast=None):
        """Convert `data` to something which can be associated to a
        `TensorVariable`.

        This function is not meant to be called in user code.  It is for
        `Linker` instances to use when running a compiled graph.
        """
        # Explicit error message when one accidentally uses a Variable as
        # input (typical mistake, especially with shared variables).
        if isinstance(data, Variable):
            raise TypeError(
                    'Expected an array-like object, but found a Variable: '
                    'maybe you are trying to call a function on a (possibly '
                    'shared) variable instead of a numeric array?')

        if ((type(data) is numpy.ndarray)
                and (data.dtype == self.numpy_dtype)):
            if data.dtype.num != self.numpy_dtype.num:
                data = theano._asarray(data, dtype=self.dtype)
            # -- now fall through to ndim check
        elif((type(data) is numpy.memmap)
                and (data.dtype == self.numpy_dtype)):
            # numpy.memmap is a "safe" subclass of ndarray,
            # so we can use it whereever we expect a base ndarray.
            # however, casting it would defeat the purpose of not
            # loading the whole data into memory
            pass
        elif strict:
            # If any of the two conditions above was not met,
            # we raise a meaningful TypeError.
            if not (type(data) is numpy.ndarray):
                raise TypeError("%s expected a ndarray object." % self,
                        data, type(data))
            if data.dtype != self.numpy_dtype:
                raise TypeError(("%s expected a ndarray object with "
                        "dtype = %s (got %s).") % (
                            self, self.numpy_dtype, data.dtype))
            assert False, "This point should never be reached."
        else:
            if allow_downcast:
                # Convert to self.dtype, regardless of the type of data
                data = theano._asarray(data, dtype=self.dtype)
                # TODO: consider to pad shape with ones to make it consistent
                # with self.broadcastable... like vector->row type thing
            else:
                if isinstance(data, numpy.ndarray):
                    # Check if self.dtype can accurately represent data
                    # (do not try to convert the data)
                    up_dtype = scal.upcast(self.dtype, data.dtype)
                    if up_dtype == self.dtype:
                        # Bug in the following line when data is a
                        # scalar array, see
                        # http://projects.scipy.org/numpy/ticket/1611
                        # data = data.astype(self.dtype)
                        data = theano._asarray(data, dtype=self.dtype)
                    if up_dtype != self.dtype:
                        err_msg = (
                            '%s cannot store a value of dtype %s without '
                            'risking loss of precision. If you do not mind '
                            'this loss, you can: '
                            '1) explicitly cast your data to %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data.dtype, self.dtype))
                        raise TypeError(err_msg, data)
                elif (allow_downcast is None and
                        type(data) is float and
                        self.dtype == theano.config.floatX):
                    # Special case where we allow downcasting of Python float
                    # literals to floatX, even when floatX=='float32'
                    data = theano._asarray(data, self.dtype)
                else:
                    # data has to be converted.
                    # Check that this conversion is lossless
                    converted_data = theano._asarray(data, self.dtype)
                    # We use the `values_eq` static function from TensorType
                    # to handle NaN values.
                    if TensorType.values_eq(numpy.asarray(data),
                                            converted_data,
                                            force_same_dtype=False):
                        data = converted_data
                    else:
                        # Do not print a too long description of data
                        # (ndarray truncates it, but it's not sure for data)
                        str_data = str(data)
                        if len(str_data) > 80:
                            str_data = str_data[:75] + '(...)'

                        err_msg = (
                            '%s cannot store accurately value %s, '
                            'it would be represented as %s. '
                            'If you do not mind this precision loss, you can: '
                            '1) explicitly convert your data to a numpy array '
                            'of dtype %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data, converted_data, self.dtype))
                        raise TypeError(err_msg, data)

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s,"
                            " got %s with shape %s." % (self.ndim, data.ndim,
                                                        data.shape))
        if not data.flags.aligned:
            try:
                msg = "object buffer" + str(data.data)
            except AttributeError:
                msg = ""
            raise TypeError("The numpy.ndarray object is not aligned."
                            " Theano C code does not support that.",
                            msg,
                            "object shape", data.shape,
                            "object strides", data.strides,
                            "object dtype", data.dtype)

        i = 0
        for b in self.broadcastable:
            if b and data.shape[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", data.shape, self.broadcastable)
            i += 1
        if (self.filter_checks_isfinite and
            not numpy.all(numpy.isfinite(data))):
            raise ValueError("non-finite elements not allowed")
        return data
Example #44
0
    def c_code(self, node, name, inputs, outputs, sub):
        x, y, p_data, p_ind, p_ptr, p_ncols = inputs
        z_data, z_ind, z_ptr = outputs
        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for x')
        if node.inputs[1].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for y')
        if node.inputs[2].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError(
                'Complex types are not supported for pattern')

        dot_out = scalar.upcast(node.inputs[0].type.dtype,
                                node.inputs[1].type.dtype)

        if dot_out == "float32":
            conv_type = "float"
            cdot = "sdot_"
        else:
            conv_type = "double"
            cdot = "ddot_"

        # retrieve dtype number
        typenum_x = node.inputs[0].type.dtype_specs()[-1]
        typenum_y = node.inputs[1].type.dtype_specs()[-1]
        typenum_p = node.inputs[2].type.dtype_specs()[-1]
        typenum_zd = tensor.TensorType(node.outputs[0].dtype,
                                       []).dtype_specs()[-1]
        typenum_zi = tensor.TensorType(node.outputs[1].dtype,
                                       []).dtype_specs()[-1]
        typenum_zp = tensor.TensorType(node.outputs[2].dtype,
                                       []).dtype_specs()[-1]

        rval = """
        if (%(x)s->nd != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
        if (%(y)s->nd != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}

        if (%(x)s->descr->type_num != %(typenum_x)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for x");
            %(fail)s;}

        if (%(y)s->descr->type_num != %(typenum_y)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for y");
            %(fail)s;}

        if (%(p_data)s->descr->type_num != %(typenum_p)s) {
            PyErr_SetString(PyExc_NotImplementedError,
                            "Invalid type for pattern");
            %(fail)s;}

        if (%(x)s->dimensions[1] != %(y)s->dimensions[1]) {
            PyErr_SetString(PyExc_NotImplementedError,
              "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
            %(fail)s;}

        if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] ||
            %(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1))
        {PyErr_SetString(PyExc_NotImplementedError,
        "The dimension of the pattern and the output must match"); %(fail)s;}

        // Allocate output
        if (!%(z_data)s
            || (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0])
            || (%(z_data)s->descr->type_num != %(typenum_zd)s)) {
            {Py_XDECREF(%(z_data)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_data)s->dimensions[0];
            %(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                            %(typenum_zd)s);
        }
        if (!%(z_ind)s
            || (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0])
            || (%(z_ind)s->descr->type_num != %(typenum_zi)s)) {
            {Py_XDECREF(%(z_ind)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_ind)s->dimensions[0];
            %(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                           %(typenum_zi)s);
        }
        if (!%(z_ptr)s
            || (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0])
            || (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) {
            {Py_XDECREF(%(z_ptr)s);}
            npy_intp dims[] = {0};
            dims[0] = %(p_ptr)s->dimensions[0];
            %(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
                                                           %(typenum_zp)s);
        }

        {
            // Product of MxK and NxK, output MxN
            npy_intp M = %(x)s->dimensions[0];
            npy_intp N = %(y)s->dimensions[0];
            npy_intp K = %(y)s->dimensions[1];

            // pointers to access actual data in the arrays passed as params.
            const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
            const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data;
            const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data;
            const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data;
            const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data;
            dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data;
            dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data;
            dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data;
            
            const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize;
            const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize;
            const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize;
            const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize;
            const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize;
            const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize;
            const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize;
            const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize;
            
            memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s));
            memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s));
            
            for (npy_int32 m = 0; m < M; ++m) {
                for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
                    const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
                    
                    const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m);
                    
                    const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n);
                    
                    Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
                }
            }
        }
        """ % dict(locals(), **sub)

        return rval
Example #45
0
    def filter_inplace(self,
                       data,
                       old_data,
                       strict=False,
                       allow_downcast=None):
        if isinstance(data,
                      gpuarray.GpuArray) and data.typecode == self.typecode:
            # This is just to make this condition not enter the
            # following branches
            pass
        elif strict:
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError(f"{self} expected a GpuArray object.", data,
                                type(data))
            if self.typecode != data.typecode:
                raise TypeError(
                    f"{self} expected typecode {int(self.typecode)} (dtype {self.dtype}), "
                    f"got {int(data.typecode)} (dtype {data.dtype}).")
            if self.context != data.context:
                raise TypeError("data context does not match type context")
            # fallthrough to ndim check
        elif allow_downcast or (allow_downcast is None and type(data) == float
                                and self.dtype == config.floatX):
            if not isinstance(data, gpuarray.GpuArray):
                data = np.array(data,
                                dtype=self.dtype,
                                copy=False,
                                ndmin=len(self.broadcastable))
            else:
                data = gpuarray.array(
                    data,
                    dtype=self.typecode,
                    copy=False,
                    ndmin=len(self.broadcastable),
                    context=self.context,
                )
        else:
            if not hasattr(data, "dtype"):
                converted_data = _asarray(data, self.dtype)
                # We use the `values_eq` static function from TensorType
                # to handle NaN values.
                if TensorType.values_eq(np.asarray(data),
                                        converted_data,
                                        force_same_dtype=False):
                    data = converted_data

            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
                if not isinstance(data, gpuarray.GpuArray):
                    data = np.array(data, dtype=self.dtype, copy=False)
                else:
                    data = gpuarray.array(data, dtype=self.dtype, copy=False)
            else:
                raise TypeError(
                    f"{self} cannot store a value of dtype {data.dtype} "
                    "without risking loss of precision.")

        if self.ndim != data.ndim:
            raise TypeError(
                f"Wrong number of dimensions: expected {self.ndim}, "
                f"got {data.ndim} with shape {data.shape}.",
                data,
            )
        shp = data.shape
        for i, b in enumerate(self.broadcastable):
            if b and shp[i] != 1:
                raise TypeError(
                    "Non-unit value on shape on a broadcastable"
                    " dimension.",
                    shp,
                    self.broadcastable,
                )
        if not isinstance(data, gpuarray.GpuArray):
            if (old_data is not None and old_data.shape == data.shape and (
                    # write() only work if the destitation is contiguous.
                    old_data.flags["C_CONTIGUOUS"]
                    or old_data.flags["F_CONTIGUOUS"])):
                old_data.write(data)
                data = old_data
            else:
                data = pygpu.array(data, context=self.context)
        return data