def local_gpua_advanced_incsubtensor(node, context_name): # This is disabled on non-cuda contexts if get_context(context_name).kind != 'cuda': return None x, y, ilist = node.inputs # Gpu Ops needs both inputs to have the same dtype if (x.type.dtype != y.type.dtype): dtype = scalar.upcast(x.type.dtype, y.type.dtype) if x.type.dtype != dtype: x = tensor.cast(x, dtype) if y.type.dtype != dtype: y = tensor.cast(y, dtype) set_instead_of_inc = node.op.set_instead_of_inc active_device_no = theano.sandbox.cuda.active_device_number() device_properties = theano.sandbox.cuda.device_properties compute_capability = device_properties(active_device_no)['major'] if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2): return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc) else: return GpuAdvancedIncSubtensor1_dev20( set_instead_of_inc=set_instead_of_inc)
def test_prod_without_zeros_custom_dtype(self): """ Test the ability to provide your own output dtype for a ProdWithoutZeros(). """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes)): prod_woz_var = ProdWithoutZeros(axis=axis, dtype=output_dtype)(x) assert prod_woz_var.dtype == output_dtype else: self.assertRaises( TypeError, ProdWithoutZeros(axis=axis, dtype=output_dtype), x) idx += 1
def make_node(self, a, val, offset): a = tensor.as_tensor_variable(a) val = tensor.as_tensor_variable(val) offset = tensor.as_tensor_variable(offset) if a.ndim != 2: raise TypeError('%s: first parameter must have exactly' ' two dimensions' % self.__class__.__name__) elif val.ndim != 0: raise TypeError('%s: second parameter must be a scalar'\ % self.__class__.__name__) elif offset.ndim != 0: raise TypeError('%s: third parameter must be a scalar'\ % self.__class__.__name__) val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError('%s: type of second parameter must be the same' ' as the first\'s' % self.__class__.__name__) elif offset.dtype[:3] != 'int': raise TypeError('%s: type of third parameter must be as integer' ' use theano.tensor.cast( input, \'int32/int64\')' \ % self.__class__.__name__) return gof.Apply(self, [a, val, offset], [a.type()])
def test_prod_custom_dtype(self): """ Test the ability to provide your own output dtype for a prod. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes)): prod_var = x.prod(dtype=output_dtype, axis=axis) assert prod_var.dtype == output_dtype if "complex" in output_dtype: continue # Check that we can take the gradient grad_var = tensor.grad(prod_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.prod, dtype=output_dtype, axis=axis) idx += 1
def local_gpua_advanced_incsubtensor(node, context_name): # This is disabled on non-cuda contexts if get_context(context_name).kind != "cuda": return None x, y, ilist = node.inputs # Gpu Ops needs both inputs to have the same dtype if x.type.dtype != y.type.dtype: dtype = scalar.upcast(x.type.dtype, y.type.dtype) if x.type.dtype != dtype: x = tensor.cast(x, dtype) if y.type.dtype != dtype: y = tensor.cast(y, dtype) set_instead_of_inc = node.op.set_instead_of_inc active_device_no = theano.sandbox.cuda.active_device_number() device_properties = theano.sandbox.cuda.device_properties compute_capability = device_properties(active_device_no)["major"] if compute_capability < 2 or x.ndim != 2 or y.ndim != 2: return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc) else: return GpuAdvancedIncSubtensor1_dev20(set_instead_of_inc=set_instead_of_inc)
def filter(self, data, strict=False, allow_downcast=None): if strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError("%s expected a GpuArray object." % self, data, type(data)) if self.typecode != data.typecode: raise TypeError("%s expected typecode %d (dtype %s), " "got %d (dtype %s)." % (self, self.typecode, self.dtype, data.typecode, str(data.dtype))) # fallthrough to ndim check elif allow_downcast: data = gpuarray.array(data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable)) else: up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: data = gpuarray.array(data, dtype=self.dtype, copy=False) else: raise TypeError("%s cannot store a value of dtype %s " "without risking loss of precision." % (self, data.dtype)) if self.ndim != data.ndim: raise TypeError("Wrong number of dimensions: expected %s, " "got %s with shape %s." % (self.ndim, data.ndim, data.shape), data) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError("Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable) return data
def test_prod_without_zeros_custom_dtype(self): """ Test the ability to provide your own output dtype for a ProdWithoutZeros(). """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes) ): prod_woz_var = ProdWithoutZeros( axis=axis, dtype=output_dtype)(x) assert prod_woz_var.dtype == output_dtype else: self.assertRaises(TypeError, ProdWithoutZeros(axis=axis, dtype=output_dtype), x) idx += 1
def test_sum_custom_dtype(self): """ Test the ability to provide your own output dtype for a sum. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): # If the output is a complex, the gradient of the sum will # cast the complex to the input dtype. We can't call the normal # cast on a complex to a not complex as this is ambiguous. if not input_dtype.startswith("complex") and output_dtype.startswith("complex"): continue axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if output_dtype == upcasted_dtype or ( input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes ): sum_var = x.sum(dtype=output_dtype, axis=axis) assert sum_var.dtype == output_dtype if "complex" in input_dtype: continue # Check that we can take the gradient grad_var = tensor.grad(sum_var.sum(), x, disconnected_inputs="ignore") else: self.assertRaises(TypeError, x.sum, dtype=output_dtype, axis=axis) idx += 1
def local_gpua_advanced_incsubtensor(node): # This optimization is disabled if cuda is not active if pygpu.get_default_context().kind != "cuda": return None x, y, ilist = node.inputs # Gpu Ops needs both inputs to have the same dtype if (x.type.dtype != y.type.dtype): dtype = scalar.upcast(x.type.dtype, y.type.dtype) if x.type.dtype != dtype: x = tensor.cast(x, dtype) if y.type.dtype != dtype: y = tensor.cast(y, dtype) set_instead_of_inc = node.op.set_instead_of_inc active_device_no = theano.sandbox.cuda.active_device_number() device_properties = theano.sandbox.cuda.device_properties compute_capability = device_properties(active_device_no)['major'] if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2): return [GpuAdvancedIncSubtensor1( set_instead_of_inc=set_instead_of_inc)(x, y, ilist)] else: return [GpuAdvancedIncSubtensor1_dev20( set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
def test_prod_custom_dtype(self): """ Test the ability to provide your own output dtype for a prod. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes) ): prod_var = x.prod(dtype=output_dtype, axis=axis) assert prod_var.dtype == output_dtype if "complex" in output_dtype: continue # Check that we can take the gradient grad_var = tensor.grad(prod_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.prod, dtype=output_dtype, axis=axis) idx += 1
def filter_inplace(self, data, old_data, strict=False, allow_downcast=None): if strict or allow_downcast or isinstance(data, cuda.CudaNdarray): return cuda.filter(data, self.broadcastable, strict, old_data) else: # (not strict) and (not allow_downcast) # Check if data.dtype can be accurately cast to self.dtype if isinstance(data, numpy.ndarray): up_dtype = scal.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: return cuda.filter(data, self.broadcastable, strict, old_data) else: raise TypeError( "%s, with dtype %s, cannot store a value of " "dtype %s without risking loss of precision." "If you do not mind, please cast your data to %s." % (self, self.dtype, data.dtype, self.dtype), data, ) else: converted_data = theano._asarray(data, self.dtype) if allow_downcast is None and type(data) is float and self.dtype == theano.config.floatX: return cuda.filter(converted_data, self.broadcastable, strict, old_data) elif numpy.all(data == converted_data): return cuda.filter(converted_data, self.broadcastable, strict, old_data) else: raise TypeError( "%s, with dtype %s, cannot store accurately value %s, " "it would be represented as %s. If you do not mind, " "you can cast your data to %s." % (self, self.dtype, data, converted_data, self.dtype), data, )
def test_prod_without_zeros_custom_acc_dtype(self): """ Test ability to provide your own acc_dtype for a ProdWithoutZeros(). """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [], [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for acc_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If acc_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if (acc_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes) ): prod_woz_var = ProdWithoutZeros( axis=axis, acc_dtype=acc_dtype)(x) assert prod_woz_var.owner.op.acc_dtype == acc_dtype if (acc_dtype.startswith('complex') and input_dtype != acc_dtype): continue f = theano.function([x], prod_woz_var) data = numpy.random.rand(2, 3) * 3 data = data.astype(input_dtype) f(data) else: self.assertRaises(TypeError, ProdWithoutZeros(axis=axis, acc_dtype=acc_dtype), x) idx += 1
def test_reduce_custom_acc_dtype(self): # Test the ability to provide your own accumulator dtype for a reduce. # We try multiple axis combinations even though axis should not matter. idx = 0 for method in self.methods: for input_dtype in self.dtypes: x = tensor.matrix(dtype=input_dtype) for acc_dtype in self.dtypes: # If the accumulator is a complex, the gradient of the reduce will # cast the complex to the input dtype. We can't call the normal # cast on a complex to a not complex as this is ambiguous. if not input_dtype.startswith("complex") and acc_dtype.startswith("complex"): continue axis = self.axes[idx % len(self.axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if acc_dtype == upcasted_dtype or ( input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes ): var = getattr(x, method)(acc_dtype=acc_dtype, axis=axis) assert var.owner.op.acc_dtype == acc_dtype if "complex" in input_dtype: continue # Check that we can take the gradient tensor.grad(var.sum(), x, disconnected_inputs="ignore") else: self.assertRaises(TypeError, getattr(x, method), acc_dtype=acc_dtype, axis=axis) idx += 1
def local_gpua_advanced_incsubtensor(node, context_name): context = get_context(context_name) # This is disabled on non-cuda contexts if context.kind != 'cuda': return None x, y, ilist = node.inputs # Gpu Ops needs both inputs to have the same dtype if (x.type.dtype != y.type.dtype): dtype = scalar.upcast(x.type.dtype, y.type.dtype) if x.type.dtype != dtype: x = tensor.cast(x, dtype) if y.type.dtype != dtype: y = tensor.cast(y, dtype) set_instead_of_inc = node.op.set_instead_of_inc compute_capability = int(context.bin_id[-2]) if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2): return GpuAdvancedIncSubtensor1( set_instead_of_inc=set_instead_of_inc) else: return GpuAdvancedIncSubtensor1_dev20( set_instead_of_inc=set_instead_of_inc)
def filter(self, data, strict=False, allow_downcast=None): if (isinstance(data, gpuarray.GpuArray) and data.typecode == self.typecode): # This is just to make this condition not enter the # following branches pass elif strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError("%s expected a GpuArray object." % self, data, type(data)) if self.typecode != data.typecode: raise TypeError("%s expected typecode %d (dtype %s), " "got %d (dtype %s)." % (self, self.typecode, self.dtype, data.typecode, str(data.dtype))) if self.context != data.context: raise TypeError("data context does not match type context") # fallthrough to ndim check elif (allow_downcast or (allow_downcast is None and type(data) == float and self.dtype == config.floatX)): data = gpuarray.array(data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable), context=self.context) else: if not hasattr(data, 'dtype'): converted_data = theano._asarray(data, self.dtype) # We use the `values_eq` static function from TensorType # to handle NaN values. if TensorType.values_eq(np.asarray(data), converted_data, force_same_dtype=False): data = converted_data data = gpuarray.array(data, context=self.context) up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: data = gpuarray.array(data, dtype=self.dtype, copy=False, context=self.context) else: raise TypeError("%s cannot store a value of dtype %s " "without risking loss of precision." % (self, data.dtype)) if self.ndim != data.ndim: raise TypeError( "Wrong number of dimensions: expected %s, " "got %s with shape %s." % (self.ndim, data.ndim, data.shape), data) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError( "Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable) return data
def filter(self, data, strict=False, allow_downcast=None): if (isinstance(data, gpuarray.GpuArray) and data.typecode == self.typecode): # This is just to make this condition not enter the # following branches pass elif strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError("%s expected a GpuArray object." % self, data, type(data)) if self.typecode != data.typecode: raise TypeError("%s expected typecode %d (dtype %s), " "got %d (dtype %s)." % (self, self.typecode, self.dtype, data.typecode, str(data.dtype))) if self.context != data.context: raise TypeError("data context does not match type context") # fallthrough to ndim check elif (allow_downcast or (allow_downcast is None and type(data) == float and self.dtype == config.floatX)): data = gpuarray.array(data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable), context=self.context) else: if not hasattr(data, 'dtype'): # This is to convert objects that don't have a dtype # (like lists). We anticipate that the type below # will match and we pass copy=False so it won't make a # second object on the GPU. data = gpuarray.array(data, copy=False, context=self.context) up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: data = gpuarray.array(data, dtype=self.dtype, copy=False, context=self.context) else: raise TypeError("%s cannot store a value of dtype %s " "without risking loss of precision." % (self, data.dtype)) if self.ndim != data.ndim: raise TypeError( "Wrong number of dimensions: expected %s, " "got %s with shape %s." % (self.ndim, data.ndim, data.shape), data) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError( "Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable) return data
def make_node(self, A, b): A_ = tensor.as_tensor_variable(A) b_ = tensor.as_tensor_variable(b) if A_.broadcastable != (False, False): raise TypeError("A must be a matrix", A_.type) if b_.broadcastable not in ((False, ), (True, False), (False, False)): raise TypeError("b must be a matrix or vector", b_.type) odtype = scalar.upcast(A_.dtype, b_.dtype) otype = tensor.TensorType(broadcastable=b_.broadcastable, dtype=odtype) return gof.Apply(op=self, inputs=[A_, b_], outputs=[otype()])
def make_node(self, A, b): A_ = tensor.as_tensor_variable(A) b_ = tensor.as_tensor_variable(b) if A_.broadcastable != (False, False): raise TypeError("A must be a matrix", A_.type) if b_.broadcastable not in ((False,), (True, False), (False, False)): raise TypeError("b must be a matrix or vector", b_.type) odtype = scalar.upcast(A_.dtype, b_.dtype) otype = tensor.TensorType(broadcastable=b_.broadcastable, dtype=odtype) return gof.Apply(op=self, inputs=[A, B], outputs=[otype()])
def filter(self, data, strict=False, allow_downcast=None): if (isinstance(data, gpuarray.GpuArray) and data.typecode == self.typecode): # This is just to make this condition not enter the # following branches pass elif strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError("%s expected a GpuArray object." % self, data, type(data)) if self.typecode != data.typecode: raise TypeError("%s expected typecode %d (dtype %s), " "got %d (dtype %s)." % (self, self.typecode, self.dtype, data.typecode, str(data.dtype))) if self.context != data.context: raise TypeError("data context does not match type context") # fallthrough to ndim check elif (allow_downcast or (allow_downcast is None and type(data) == float and self.dtype == config.floatX)): data = gpuarray.array(data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable), context=self.context) else: if not hasattr(data, 'dtype'): converted_data = theano._asarray(data, self.dtype) # We use the `values_eq` static function from TensorType # to handle NaN values. if TensorType.values_eq(numpy.asarray(data), converted_data, force_same_dtype=False): data = converted_data data = gpuarray.array(data, context=self.context) up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: data = gpuarray.array(data, dtype=self.dtype, copy=False, context=self.context) else: raise TypeError("%s cannot store a value of dtype %s " "without risking loss of precision." % (self, data.dtype)) if self.ndim != data.ndim: raise TypeError("Wrong number of dimensions: expected %s, " "got %s with shape %s." % (self.ndim, data.ndim, data.shape), data) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError("Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable) return data
def make_node(self, a, val): a = tensor.as_tensor_variable(a) val = tensor.as_tensor_variable(val) if a.ndim < 2: raise TypeError("%s: first parameter must have at least" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same as" " the first's" % self.__class__.__name__) return gof.Apply(self, [a, val], [a.type()])
def make_node(self, x, y, p): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) if not _is_sparse_variable(p): raise TypeError(p) #TODO: use it. dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype) return gof.Apply(self, [x, y, p], [p.type()])
def make_node(self, x, y, pattern): if (_is_sparse_variable(x) or _is_sparse_variable(y) or _is_sparse_variable(pattern)): raise TypeError(x) x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) pattern = tensor.as_tensor_variable(pattern) dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, pattern.type.dtype) blas_out = scalar.upcast(x.type.dtype, y.type.dtype) # We call blas ?axpy function that take only param of the same type x = tensor.cast(x, blas_out) y = tensor.cast(y, blas_out) return gof.Apply( self, [x, y, pattern], [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))])
def filter(self, data, strict=False, allow_downcast=None): if (isinstance(data, gpuarray.GpuArray) and data.typecode == self.typecode): # This is just to make this condition not enter the # following branches pass elif strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError("%s expected a GpuArray object." % self, data, type(data)) if self.typecode != data.typecode: raise TypeError("%s expected typecode %d (dtype %s), " "got %d (dtype %s)." % (self, self.typecode, self.dtype, data.typecode, str(data.dtype))) if self.context != data.context: raise TypeError("data context does not match type context") # fallthrough to ndim check elif (allow_downcast or (allow_downcast is None and type(data) == float and self.dtype == config.floatX)): data = gpuarray.array(data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable), context=self.context) else: if not hasattr(data, 'dtype'): # This is to convert objects that don't have a dtype # (like lists). We anticipate that the type below # will match and we pass copy=False so it won't make a # second object on the GPU. data = gpuarray.array(data, copy=False, context=self.context) up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: data = gpuarray.array(data, dtype=self.dtype, copy=False, context=self.context) else: raise TypeError("%s cannot store a value of dtype %s " "without risking loss of precision." % (self, data.dtype)) if self.ndim != data.ndim: raise TypeError("Wrong number of dimensions: expected %s, " "got %s with shape %s." % (self.ndim, data.ndim, data.shape), data) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError("Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable) return data
def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) p_data = tensor.as_tensor_variable(p_data) p_ind = tensor.as_tensor_variable(p_ind) p_ptr = tensor.as_tensor_variable(p_ptr) p_ncols = tensor.as_tensor_variable(p_ncols) assert p_ncols.dtype == 'int32' dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype) dot_out = scalar.upcast(x.type.dtype, y.type.dtype) # We call blas ?dot function that take only param of the same type x = tensor.cast(x, dot_out) y = tensor.cast(y, dot_out) return gof.Apply(self, [x, y, p_data, p_ind, p_ptr, p_ncols], [ tensor.tensor(dtype=dtype_out, broadcastable=(False,)), tensor.tensor(dtype=p_ind.type.dtype, broadcastable=(False,)), tensor.tensor(dtype=p_ptr.type.dtype, broadcastable=(False,)) ])
def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) p_data = tensor.as_tensor_variable(p_data) p_ind = tensor.as_tensor_variable(p_ind) p_ptr = tensor.as_tensor_variable(p_ptr) p_ncols = tensor.as_tensor_variable(p_ncols) assert p_ncols.dtype == 'int32' dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype) dot_out = scalar.upcast(x.type.dtype, y.type.dtype) # We call blas ?dot function that take only param of the same type x = tensor.cast(x, dot_out) y = tensor.cast(y, dot_out) return gof.Apply(self, [x, y, p_data, p_ind, p_ptr, p_ncols], [ tensor.tensor(dtype=dtype_out, broadcastable=(False, )), tensor.tensor(dtype=p_ind.type.dtype, broadcastable=(False, )), tensor.tensor(dtype=p_ptr.type.dtype, broadcastable=(False, )) ])
def make_node(self, a, val): a = tensor.as_tensor_variable(a) val = tensor.as_tensor_variable(val) if a.ndim < 2: raise TypeError('%s: first parameter must have at least' ' two dimensions' % self.__class__.__name__) elif val.ndim != 0: raise TypeError('%s: second parameter must be a scalar' % self.__class__.__name__) val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError('%s: type of second parameter must be the same as' ' the first\'s' % self.__class__.__name__) return gof.Apply(self, [a, val], [a.type()])
def make_node(self, a, val): a = basic.as_tensor_variable(a) val = basic.as_tensor_variable(val) if a.ndim < 2: raise TypeError("%s: first parameter must have at least" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) val = basic.cast(val, dtype=upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same as" " the first's" % self.__class__.__name__) return Apply(self, [a, val], [a.type()])
def make_node(self, x, y, pattern): if (_is_sparse_variable(x) or _is_sparse_variable(y) or _is_sparse_variable(pattern)): raise TypeError(x) x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) pattern = tensor.as_tensor_variable(pattern) dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, pattern.type.dtype) return gof.Apply( self, [x, y, pattern], [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))])
def make_node(self, a, val, offset): a = tensor.as_tensor_variable(a) val = tensor.as_tensor_variable(val) offset = tensor.as_tensor_variable(offset) if a.ndim != 2: raise TypeError("%s: first parameter must have exactly" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) elif offset.ndim != 0: raise TypeError("%s: third parameter must be a scalar" % self.__class__.__name__) val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same" " as the first's" % self.__class__.__name__) elif offset.dtype not in theano.tensor.integer_dtypes: raise TypeError( "%s: type of third parameter must be as integer" " use theano.tensor.cast( input, 'int32/int64')" % self.__class__.__name__ ) return gof.Apply(self, [a, val, offset], [a.type()])
def test_prod_custom_acc_dtype(self): """ Test the ability to provide your own acc_dtype for a prod. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [], [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for acc_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If acc_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if (acc_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes)): prod_var = x.prod(acc_dtype=acc_dtype, axis=axis) assert prod_var.owner.op.acc_dtype == acc_dtype if (acc_dtype.startswith('complex') and input_dtype != acc_dtype): continue f = theano.function([x], prod_var) data = numpy.random.rand(3, 4) * 10 data = data.astype(input_dtype) f(data) if "complex" in acc_dtype: continue # Check that we can take the gradient tensor.grad(prod_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.prod, acc_dtype=acc_dtype, axis=axis) idx += 1
def test_sum_custom_dtype(self): """ Test the ability to provide your own output dtype for a sum. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for output_dtype in imap(str, theano.scalar.all_types): # If the output is a complex, the gradient of the sum will # cast the complex to the input dtype. We can't call the normal # cast on a complex to a not complex as this is ambiguous. if (not input_dtype.startswith('complex') and output_dtype.startswith('complex')): continue axis = axes[idx % len(axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, output_dtype) if (output_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and output_dtype in tensor.continuous_dtypes)): sum_var = x.sum(dtype=output_dtype, axis=axis) assert sum_var.dtype == output_dtype if "complex" in input_dtype: continue # Check that we can take the gradient grad_var = tensor.grad(sum_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.sum, dtype=output_dtype, axis=axis) idx += 1
def test_reduce_custom_acc_dtype(self): """ Test the ability to provide your own accumulator dtype for a reduce. """ # We try multiple axis combinations even though axis should not matter. idx = 0 for method in self.methods: for input_dtype in self.dtypes: x = tensor.matrix(dtype=input_dtype) for acc_dtype in self.dtypes: # If the accumulator is a complex, the gradient of the reduce will # cast the complex to the input dtype. We can't call the normal # cast on a complex to a not complex as this is ambiguous. if (not input_dtype.startswith('complex') and acc_dtype.startswith('complex')): continue axis = self.axes[idx % len(self.axes)] # If output_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if (acc_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes) ): var = getattr(x, method)(acc_dtype=acc_dtype, axis=axis) assert var.owner.op.acc_dtype == acc_dtype if "complex" in input_dtype: continue # Check that we can take the gradient tensor.grad(var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, getattr(x, method), acc_dtype=acc_dtype, axis=axis) idx += 1
def test_prod_custom_acc_dtype(self): """ Test the ability to provide your own acc_dtype for a prod. """ # We try multiple axis combinations even though axis should not matter. axes = [None, 0, 1, [], [0], [1], [0, 1]] idx = 0 for input_dtype in imap(str, theano.scalar.all_types): x = tensor.matrix(dtype=input_dtype) for acc_dtype in imap(str, theano.scalar.all_types): axis = axes[idx % len(axes)] # If acc_dtype would force a downcast, we expect a TypeError # We always allow int/uint inputs with float/complex outputs. upcasted_dtype = scalar.upcast(input_dtype, acc_dtype) if (acc_dtype == upcasted_dtype or (input_dtype in tensor.discrete_dtypes and acc_dtype in tensor.continuous_dtypes) ): prod_var = x.prod(acc_dtype=acc_dtype, axis=axis) assert prod_var.owner.op.acc_dtype == acc_dtype if (acc_dtype.startswith('complex') and input_dtype != acc_dtype): continue f = theano.function([x], prod_var) data = numpy.random.rand(3, 4) * 10 data = data.astype(input_dtype) f(data) if "complex" in acc_dtype: continue # Check that we can take the gradient tensor.grad(prod_var.sum(), x, disconnected_inputs='ignore') else: self.assertRaises(TypeError, x.prod, acc_dtype=acc_dtype, axis=axis) idx += 1
def filter_inplace(self, data, old_data, strict=False, allow_downcast=None): if strict or allow_downcast or isinstance(data, cuda.CudaNdarray): return cuda.filter(data, self.broadcastable, strict, old_data) else: # (not strict) and (not allow_downcast) # Check if data.dtype can be accurately cast to self.dtype if isinstance(data, numpy.ndarray): up_dtype = scal.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: return cuda.filter(data, self.broadcastable, strict, old_data) else: raise TypeError( '%s, with dtype %s, cannot store a value of ' 'dtype %s without risking loss of precision.' 'If you do not mind, please cast your data to %s.' % (self, self.dtype, data.dtype, self.dtype), data) else: converted_data = theano._asarray(data, self.dtype) if (allow_downcast is None and type(data) is float and self.dtype == theano.config.floatX): return cuda.filter(converted_data, self.broadcastable, strict, old_data) elif numpy.all(data == converted_data): return cuda.filter(converted_data, self.broadcastable, strict, old_data) else: raise TypeError( '%s, with dtype %s, cannot store accurately value %s, ' 'it would be represented as %s. If you do not mind, ' 'you can cast your data to %s.' % (self, self.dtype, data, converted_data, self.dtype), data)
def make_node(self, a, val, offset): a = basic.as_tensor_variable(a) val = basic.as_tensor_variable(val) offset = basic.as_tensor_variable(offset) if a.ndim != 2: raise TypeError("%s: first parameter must have exactly" " two dimensions" % self.__class__.__name__) elif val.ndim != 0: raise TypeError("%s: second parameter must be a scalar" % self.__class__.__name__) elif offset.ndim != 0: raise TypeError("%s: third parameter must be a scalar" % self.__class__.__name__) val = basic.cast(val, dtype=upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError("%s: type of second parameter must be the same" " as the first's" % self.__class__.__name__) elif offset.dtype not in theano.tensor.integer_dtypes: raise TypeError("%s: type of third parameter must be as integer" " use theano.tensor.cast( input, 'int32/int64')" % self.__class__.__name__) return Apply(self, [a, val, offset], [a.type()])
def make_node(self, a, val, offset): a = tensor.as_tensor_variable(a) val = tensor.as_tensor_variable(val) offset = tensor.as_tensor_variable(offset) if a.ndim != 2: raise TypeError('%s: first parameter must have exactly' ' two dimensions' % self.__class__.__name__) elif val.ndim != 0: raise TypeError('%s: second parameter must be a scalar' % self.__class__.__name__) elif offset.ndim != 0: raise TypeError('%s: third parameter must be a scalar' % self.__class__.__name__) val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype)) if val.dtype != a.dtype: raise TypeError('%s: type of second parameter must be the same' ' as the first\'s' % self.__class__.__name__) elif offset.dtype[:3] != 'int': raise TypeError('%s: type of third parameter must be as integer' ' use theano.tensor.cast( input, \'int32/int64\')' % self.__class__.__name__) return gof.Apply(self, [a, val, offset], [a.type()])
def local_usmm_csx(node): """ usmm -> usmm_csc_dense """ if node.op == usmm: alpha, x, y, z = node.inputs x_is_sparse_variable = _is_sparse_variable(x) y_is_sparse_variable = _is_sparse_variable(y) if x_is_sparse_variable and not y_is_sparse_variable: if x.type.format == 'csc': x_val, x_ind, x_ptr, x_shape = csm_properties(x) x_nsparse = x_shape[0] dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype, y.type.dtype, z.type.dtype) if dtype_out not in ('float32', 'float64'): return False # Sparse cast is not implemented. if y.type.dtype != dtype_out: return False return [usmm_csc_dense(alpha, x_val, x_ind, x_ptr, x_nsparse, y, z)] return False
def c_code(self, node, name, inputs, outputs, sub): x, y, p_data, p_ind, p_ptr, p_ncols = inputs z_data, z_ind, z_ptr = outputs if node.inputs[0].type.dtype in ('complex64', 'complex128'): raise NotImplementedError('Complex types are not supported for x') if node.inputs[1].type.dtype in ('complex64', 'complex128'): raise NotImplementedError('Complex types are not supported for y') if node.inputs[2].type.dtype in ('complex64', 'complex128'): raise NotImplementedError( 'Complex types are not supported for pattern') dot_out = scalar.upcast(node.inputs[0].type.dtype, node.inputs[1].type.dtype) if dot_out == "float32": conv_type = "float" cdot = "sdot_" else: conv_type = "double" cdot = "ddot_" # retrieve dtype number typenum_x = node.inputs[0].type.dtype_specs()[-1] typenum_y = node.inputs[1].type.dtype_specs()[-1] typenum_p = node.inputs[2].type.dtype_specs()[-1] typenum_zd = tensor.TensorType(node.outputs[0].dtype, []).dtype_specs()[-1] typenum_zi = tensor.TensorType(node.outputs[1].dtype, []).dtype_specs()[-1] typenum_zp = tensor.TensorType(node.outputs[2].dtype, []).dtype_specs()[-1] rval = """ if (%(x)s->nd != 2) { PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;} if (%(y)s->nd != 2) { PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} if (%(x)s->descr->type_num != %(typenum_x)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x"); %(fail)s;} if (%(y)s->descr->type_num != %(typenum_y)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;} if (%(p_data)s->descr->type_num != %(typenum_p)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for pattern"); %(fail)s;} if (%(x)s->dimensions[1] != %(y)s->dimensions[1]) { PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed."); %(fail)s;} if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] || %(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1)) {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the pattern and the output must match"); %(fail)s;} // Allocate output if (!%(z_data)s || (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0]) || (%(z_data)s->descr->type_num != %(typenum_zd)s)) { {Py_XDECREF(%(z_data)s);} npy_intp dims[] = {0}; dims[0] = %(p_data)s->dimensions[0]; %(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zd)s); } if (!%(z_ind)s || (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0]) || (%(z_ind)s->descr->type_num != %(typenum_zi)s)) { {Py_XDECREF(%(z_ind)s);} npy_intp dims[] = {0}; dims[0] = %(p_ind)s->dimensions[0]; %(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zi)s); } if (!%(z_ptr)s || (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0]) || (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) { {Py_XDECREF(%(z_ptr)s);} npy_intp dims[] = {0}; dims[0] = %(p_ptr)s->dimensions[0]; %(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zp)s); } { // Product of MxK and NxK, output MxN npy_intp M = %(x)s->dimensions[0]; npy_intp N = %(y)s->dimensions[0]; npy_intp K = %(y)s->dimensions[1]; // pointers to access actual data in the arrays passed as params. const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data; const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data; const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data; const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data; const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data; dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data; dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data; dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data; const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize; const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize; const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize; const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize; const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize; const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize; const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize; const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize; memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s)); memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s)); for (npy_int32 m = 0; m < M; ++m) { for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) { const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m); const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n); Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy); } } } """ % dict(locals(), **sub) return rval
def filter(self, data, strict=False, allow_downcast=None): """Convert `data` to something which can be associated to a `TensorVariable`. This function is not meant to be called in user code. It is for `Linker` instances to use when running a compiled graph. """ # Explicit error message when one accidentally uses a Variable as # input (typical mistake, especially with shared variables). if isinstance(data, Variable): raise TypeError( 'Expected an array-like object, but found a Variable: ' 'maybe you are trying to call a function on a (possibly ' 'shared) variable instead of a numeric array?') if ((type(data) is numpy.ndarray) and (data.dtype == self.numpy_dtype)): if data.dtype.num != self.numpy_dtype.num: data = theano._asarray(data, dtype=self.dtype) # -- now fall through to ndim check elif((type(data) is numpy.memmap) and (data.dtype == self.numpy_dtype)): # numpy.memmap is a "safe" subclass of ndarray, # so we can use it whereever we expect a base ndarray. # however, casting it would defeat the purpose of not # loading the whole data into memory pass elif strict: # If any of the two conditions above was not met, # we raise a meaningful TypeError. if not (type(data) is numpy.ndarray): raise TypeError("%s expected a ndarray object." % self, data, type(data)) if data.dtype != self.numpy_dtype: raise TypeError(("%s expected a ndarray object with " "dtype = %s (got %s).") % ( self, self.numpy_dtype, data.dtype)) assert False, "This point should never be reached." else: if allow_downcast: # Convert to self.dtype, regardless of the type of data data = theano._asarray(data, dtype=self.dtype) # TODO: consider to pad shape with ones to make it consistent # with self.broadcastable... like vector->row type thing else: if isinstance(data, numpy.ndarray): # Check if self.dtype can accurately represent data # (do not try to convert the data) up_dtype = scal.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: # Bug in the following line when data is a # scalar array, see # http://projects.scipy.org/numpy/ticket/1611 # data = data.astype(self.dtype) data = theano._asarray(data, dtype=self.dtype) if up_dtype != self.dtype: err_msg = ( '%s cannot store a value of dtype %s without ' 'risking loss of precision. If you do not mind ' 'this loss, you can: ' '1) explicitly cast your data to %s, or ' '2) set "allow_input_downcast=True" when calling ' '"function".' % (self, data.dtype, self.dtype)) raise TypeError(err_msg, data) elif (allow_downcast is None and type(data) is float and self.dtype == theano.config.floatX): # Special case where we allow downcasting of Python float # literals to floatX, even when floatX=='float32' data = theano._asarray(data, self.dtype) else: # data has to be converted. # Check that this conversion is lossless converted_data = theano._asarray(data, self.dtype) # We use the `values_eq` static function from TensorType # to handle NaN values. if TensorType.values_eq(numpy.asarray(data), converted_data, force_same_dtype=False): data = converted_data else: # Do not print a too long description of data # (ndarray truncates it, but it's not sure for data) str_data = str(data) if len(str_data) > 80: str_data = str_data[:75] + '(...)' err_msg = ( '%s cannot store accurately value %s, ' 'it would be represented as %s. ' 'If you do not mind this precision loss, you can: ' '1) explicitly convert your data to a numpy array ' 'of dtype %s, or ' '2) set "allow_input_downcast=True" when calling ' '"function".' % (self, data, converted_data, self.dtype)) raise TypeError(err_msg, data) if self.ndim != data.ndim: raise TypeError("Wrong number of dimensions: expected %s," " got %s with shape %s." % (self.ndim, data.ndim, data.shape)) if not data.flags.aligned: try: msg = "object buffer" + str(data.data) except AttributeError: msg = "" raise TypeError("The numpy.ndarray object is not aligned." " Theano C code does not support that.", msg, "object shape", data.shape, "object strides", data.strides, "object dtype", data.dtype) i = 0 for b in self.broadcastable: if b and data.shape[i] != 1: raise TypeError("Non-unit value on shape on a broadcastable" " dimension.", data.shape, self.broadcastable) i += 1 if (self.filter_checks_isfinite and not numpy.all(numpy.isfinite(data))): raise ValueError("non-finite elements not allowed") return data
class SamplingDotDenseGrad(gof.Op): """ Optimized gradient of SamplingDot when the pattern P is a dense matrix. """ def __eq__(self, other): return type(self) == type(other) def __hash__(self): return hash(type(self)) def __str__(self): return 'SamplingDotGrad{Dense}' def make_node(self, x, y, pattern): if (_is_sparse_variable(x) or _is_sparse_variable(y) or _is_sparse_variable(pattern)): raise TypeError(x) x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) pattern = tensor.as_tensor_variable(pattern) dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, pattern.type.dtype) blas_out = scalar.upcast(x.type.dtype, y.type.dtype) # We call blas ?axpy function that take only param of the same type x = tensor.cast(x, blas_out) y = tensor.cast(y, blas_out) return gof.Apply( self, [x, y, pattern], [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))]) #def perform(self, node, (x, y, pattern), (out,)): # raise NotImplemented() def c_support_code(self): return blas.blas_header_text() def c_libraries(self): return blas.ldflags() def c_compile_args(self): return blas.ldflags(libs=False, flags=True) def c_lib_dirs(self): return blas.ldflags(libs=False, libs_dir=True) def c_header_dirs(self): return blas.ldflags(libs=False, include_dir=True) def c_code(self, node, name, (x, y, pattern), (z, ), sub): if node.inputs[0].type.dtype in ('complex64', 'complex128'): raise NotImplementedError('Complex types are not supported for x') if node.inputs[1].type.dtype in ('complex64', 'complex128'): raise NotImplementedError('Complex types are not supported for y') if node.inputs[2].type.dtype in ('complex64', 'complex128'): raise NotImplementedError( 'Complex types are not supported for pattern') blas_out = scalar.upcast(node.inputs[0].type.dtype, node.inputs[0].type.dtype) if blas_out == "float32": conv_type = "float" axpy = "saxpy_" else: conv_type = "double" axpy = "daxpy_" typenum_x = node.inputs[0].type.dtype_specs()[ -1] # retrieve dtype number typenum_y = node.inputs[1].type.dtype_specs()[ -1] # retrieve dtype number typenum_pattern = node.inputs[2].type.dtype_specs()[ -1] # retrieve dtype number typenum_z = tensor.TensorType( node.outputs[0].dtype, []).dtype_specs()[-1] # retrieve dtype number rval = """ if (%(x)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;} if (%(y)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} if (%(pattern)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(pattern) != 2"); %(fail)s;} if (%(x)s->descr->type_num != %(typenum_x)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x"); %(fail)s;} if (%(y)s->descr->type_num != %(typenum_y)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;} if (%(pattern)s->descr->type_num != %(typenum_pattern)s) { PyErr_SetString(PyExc_NotImplementedError, "Invalid type for pattern"); %(fail)s;} if (%(x)s->dimensions[1] != %(y)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;} if (%(pattern)s->dimensions[0] != %(x)s->dimensions[0] || %(pattern)s->dimensions[1] != %(x)s->dimensions[1]) {PyErr_SetString(PyExc_NotImplementedError, "The dimension of x and the pattern must match"); %(fail)s;} if (!%(z)s || (%(z)s->dimensions[0] != %(x)s->dimensions[0]) || (%(z)s->dimensions[1] != %(y)s->dimensions[1]) ) { {Py_XDECREF(%(z)s);} npy_intp dims[] = {0,0}; dims[0] = %(x)s->dimensions[0]; dims[1] = %(y)s->dimensions[1]; %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s); } { // sparse array has size MxK, dense KxN, output MxN npy_intp M = %(z)s->dimensions[0]; npy_intp N = %(z)s->dimensions[1]; npy_intp K = %(y)s->dimensions[0]; // pointers to access actual data in the arrays passed as params. dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data; const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data; const dtype_%(pattern)s* __restrict__ Dpattern = (dtype_%(pattern)s*)%(pattern)s->data; const npy_intp Sdz = %(z)s->strides[1]/%(z)s->descr->elsize; const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize; const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize; const npy_intp Sdp = %(pattern)s->strides[1]/%(pattern)s->descr->elsize; //clear the output array memset(Dz, 0, M*N*sizeof(dtype_%(z)s)); for (npy_int32 m = 0; m < M; ++m) { // pointer to m-th row of the output matrix Z dtype_%(z)s* const __restrict__ z_row = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); const dtype_%(pattern)s* p_row = (dtype_%(pattern)s*)(%(pattern)s->data + %(pattern)s->strides[0] * m); const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m); for (npy_int32 k = 0; k < K; ++k) { if (*(p_row + k*Sdp) != 0) { const dtype_%(x)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k); %(axpy)s((int*)&N, (%(conv_type)s*)(x_row + k*Sdx), (%(conv_type)s*)y_row, (int*)&Sdy, (%(conv_type)s*)z_row, (int*)&Sdz); } } } } """ % dict(locals(), **sub) return rval
def filter_inplace(self, data, old_data, strict=False, allow_downcast=None): if isinstance(data, gpuarray.GpuArray) and data.typecode == self.typecode: # This is just to make this condition not enter the # following branches pass elif strict: if not isinstance(data, gpuarray.GpuArray): raise TypeError(f"{self} expected a GpuArray object.", data, type(data)) if self.typecode != data.typecode: raise TypeError( f"{self} expected typecode {int(self.typecode)} (dtype {self.dtype}), " f"got {int(data.typecode)} (dtype {data.dtype}).") if self.context != data.context: raise TypeError("data context does not match type context") # fallthrough to ndim check elif allow_downcast or (allow_downcast is None and type(data) == float and self.dtype == config.floatX): if not isinstance(data, gpuarray.GpuArray): data = np.array(data, dtype=self.dtype, copy=False, ndmin=len(self.broadcastable)) else: data = gpuarray.array( data, dtype=self.typecode, copy=False, ndmin=len(self.broadcastable), context=self.context, ) else: if not hasattr(data, "dtype"): converted_data = _asarray(data, self.dtype) # We use the `values_eq` static function from TensorType # to handle NaN values. if TensorType.values_eq(np.asarray(data), converted_data, force_same_dtype=False): data = converted_data up_dtype = scalar.upcast(self.dtype, data.dtype) if up_dtype == self.dtype: if not isinstance(data, gpuarray.GpuArray): data = np.array(data, dtype=self.dtype, copy=False) else: data = gpuarray.array(data, dtype=self.dtype, copy=False) else: raise TypeError( f"{self} cannot store a value of dtype {data.dtype} " "without risking loss of precision.") if self.ndim != data.ndim: raise TypeError( f"Wrong number of dimensions: expected {self.ndim}, " f"got {data.ndim} with shape {data.shape}.", data, ) shp = data.shape for i, b in enumerate(self.broadcastable): if b and shp[i] != 1: raise TypeError( "Non-unit value on shape on a broadcastable" " dimension.", shp, self.broadcastable, ) if not isinstance(data, gpuarray.GpuArray): if (old_data is not None and old_data.shape == data.shape and ( # write() only work if the destitation is contiguous. old_data.flags["C_CONTIGUOUS"] or old_data.flags["F_CONTIGUOUS"])): old_data.write(data) data = old_data else: data = pygpu.array(data, context=self.context) return data