def make_node(self, images, hid_grads, output_shape): if not isinstance(images.type, CudaNdarrayType): raise TypeError("WeightActs: expected images.type " "to be CudaNdarrayType, " "got " + str(images.type)) if not isinstance(hid_grads.type, CudaNdarrayType): raise TypeError("WeightActs: expected hid_acts.type " "to be CudaNdarrayType, " "got " + str(hid_grads.type)) assert images.ndim == 4 assert hid_grads.ndim == 4 input_channels_broadcastable = images.type.broadcastable[0] # We don't know anything about filter_rows or filter_cols at compile # time, so we assume they're not broadcastable. filter_rows_broadcastable = False filter_cols_broadcastable = False output_channels_broadcastable = hid_grads.type.broadcastable[0] weights_grads_type = CudaNdarrayType( (input_channels_broadcastable, filter_rows_broadcastable, filter_cols_broadcastable, output_channels_broadcastable)) partial_sums_type = CudaNdarrayType((False, ) * 5) weights_grads = weights_grads_type() partial_sums = partial_sums_type() return Apply(self, [images, hid_grads, output_shape], [weights_grads, partial_sums])
def make_node(self, images, top_down): """ .. todo:: WRITEME """ images = as_cuda_ndarray_variable(images) top_down = as_cuda_ndarray_variable(top_down) assert images.ndim == 4 assert top_down.ndim == 4 channels_broadcastable = images.type.broadcastable[0] batch_broadcastable = images.type.broadcastable[3] rows_broadcastable = False cols_broadcastable = False houtput_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) houtput_type = CudaNdarrayType(broadcastable=houtput_broadcastable) houtput = houtput_type() poutput_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) poutput_type = CudaNdarrayType(broadcastable=poutput_broadcastable) poutput = poutput_type() return Apply(self, [images, top_down], [houtput, poutput])
def test_float32_shared_constructor(): npy_row = numpy.zeros((1, 10), dtype='float32') def eq(a, b): return a == b # test that we can create a CudaNdarray assert (f32sc(npy_row).type == CudaNdarrayType((False, False))) # test that broadcastable arg is accepted, and that they # don't strictly have to be tuples assert eq( f32sc(npy_row, broadcastable=(True, False)).type, CudaNdarrayType((True, False))) assert eq( f32sc(npy_row, broadcastable=[True, False]).type, CudaNdarrayType((True, False))) assert eq( f32sc(npy_row, broadcastable=numpy.array([True, False])).type, CudaNdarrayType([True, False])) # test that we can make non-matrix shared vars assert eq( f32sc(numpy.zeros((2, 3, 4, 5), dtype='float32')).type, CudaNdarrayType((False, ) * 4))
def tensor_type(cls, dtype, ndim): # noinspection PyUnresolvedReferences,PyPackageRequirements from theano.sandbox.cuda import CudaNdarrayType if dtype != "float32": print("%s: WARNING: cannot handle type %r, will use float32 instead" % ("GpuNativeOp", dtype)) dtype = "float32" return CudaNdarrayType(dtype=dtype, broadcastable=(False,) * ndim)
def make_node(self, pvals): assert pvals.dtype == 'float32' if not isinstance(pvals.type, CudaNdarrayType): raise TypeError('pvals must be cudandarray', pvals) if self.odtype == 'auto': odtype = pvals.dtype else: odtype = self.odtype if odtype != pvals.dtype: raise NotImplementedError('GpuKArgmax works only if' 'self.odtype == pvals.dtype', odtype, pvals.dtype) br = (pvals.broadcastable[0], pvals.broadcastable[1]) vals = CudaNdarrayType(broadcastable=br)() indx = CudaNdarrayType(broadcastable=br)() return Apply(self, [pvals], [vals, indx])
def make_node(self, images, acts, denoms, dout): """ .. todo:: WRITEME """ if not isinstance(images.type, CudaNdarrayType): inputs = images, acts, denoms, dout names = "images", "acts", "denoms", "dout" for name, var in zip(names, inputs): if not isinstance(var.type, CudaNdarrayType): raise TypeError("CrossMapNormUndo: expected %s.type " "to be CudaNdarrayType, " "got %s" (name, str(images.type))) assert images.ndim == 4 assert acts.ndim == 4 assert denoms.ndim == 4 assert dout.ndim == 4 # Not strictly necessary I don't think assert images.type.broadcastable == acts.type.broadcastable assert images.type.broadcastable == denoms.type.broadcastable assert images.type.broadcastable == dout.type.broadcastable targets_broadcastable = tuple(images.type.broadcastable) targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) targets = targets_type() out_acts = targets_type() return Apply(self, [images, acts, denoms, dout], [targets, out_acts])
def make_node(self, x): from theano.sandbox.cuda import CudaNdarrayType if not isinstance(x.type, GpuArrayType): raise TypeError(x) if x.type.dtype != 'float32': raise TypeError(x) return Apply(self, [x], [CudaNdarrayType(broadcastable=x.broadcastable)()])
def make_node(self, hid_acts, filters): if not isinstance(hid_acts.type, CudaNdarrayType): raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, " "got " + str(hid_acts.type)) if not isinstance(filters.type, CudaNdarrayType): raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, " "got " + str(filters.type)) assert hid_acts.ndim == 4 assert filters.ndim == 4 channels_broadcastable = filters.type.broadcastable[3] batch_broadcastable = hid_acts.type.broadcastable[3] # Computing whether the rows and columns are broadcastable requires doing # arithmetic on quantities that are known only at runtime, like the specific # shape of the image and kernel rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [hid_acts, filters], [targets])
def test_optimization(): op = CrossMapNorm(16, 15. / 16., 1, True) x_ = theano.tensor.TensorVariable(CudaNdarrayType([False] * 4)) f = theano.function([x_], theano.grad(op(x_)[0].sum(), x_)) nodes = [ x for x in f.maker.fgraph.apply_nodes if type(x.op) == CrossMapNormUndo ] assert len(nodes) == 1 assert nodes[0].op.inplace
def make_node(self, mask_idx, image): mask_idx = contiguouse(mask_idx) image = contiguouse(image) assert mask_idx.dtype == "float32" assert image.dtype == "float32" output_type = CudaNdarrayType(broadcastable=(False, ) * 5) return theano.Apply( self, [mask_idx, image], [output_type(), output_type(), output_type()])
def values_eq_approx_high_tol(a, b): """This fct is needed to don't have DebugMode raise useless error due to rounding error. This happen with big input size due to change in the order of operation. """ rtol = None if a.size > 100000: # For float32 the default rtol is 1e-5 rtol = 5e-5 return CudaNdarrayType.values_eq_approx(a, b, rtol=rtol)
def make_node(self, W, b, d, H, RShape=None): W_ = as_cuda_ndarray_variable(W) b_ = as_cuda_ndarray_variable(b) d_ = T.as_tensor_variable(d) H_ = as_cuda_ndarray_variable(H) if RShape: RShape_ = T.as_tensor_variable(RShape) else: RShape_ = T.as_tensor_variable([-1, -1, -1]) return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_], outputs=[CudaNdarrayType(dtype=H_.dtype, broadcastable=(False,)*5)()])
def make_node(self, ten4, neib_shape, neib_step): assert ten4.dtype == 'float32' if not isinstance(ten4.type, CudaNdarrayType): raise TypeError('ten4 must be cudandarray', ten4) assert ten4.ndim == 4 assert neib_shape.ndim == 1 assert neib_step.ndim == 1 return Apply(self, [ten4, neib_shape, neib_step], [ CudaNdarrayType(broadcastable=(False, False), dtype=ten4.type.dtype)() ])
def make_node(self, images): if not isinstance(images.type, CudaNdarrayType): raise TypeError("CrossMapNorm: expected images.type to be CudaNdarrayType, " "got " + str(images.type)) assert images.ndim == 4 targets_broadcastable = images.type.broadcastable targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) denoms = targets_type() targets = targets_type() return Apply(self, [images], [targets, denoms])
def make_node(self, V, d, WShape, dCdH): """ :param V: visible :param d: strides :param WShape: shapes of the weights -> shape of this op output :param dCdH: other input with what V will be convolved. """ V_ = as_cuda_ndarray_variable(V) d_ = T.as_tensor_variable(d) WShape_ = T.as_tensor_variable(WShape) dCdH_ = as_cuda_ndarray_variable(dCdH) return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_], outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
def make_node(self, ten4, neib_shape, neib_step): ten4 = as_cuda_ndarray_variable(ten4) neib_shape = tensor.as_tensor_variable(neib_shape) neib_step = tensor.as_tensor_variable(neib_step) assert ten4.ndim == 4 assert ten4.dtype == 'float32' assert neib_shape.ndim == 1 assert neib_step.ndim == 1 assert "int" in neib_shape.dtype assert "int" in neib_step.dtype return Apply(self, [ten4, neib_shape, neib_step], [CudaNdarrayType(broadcastable=(False, False), dtype=ten4.type.dtype)()])
def make_node(self, images, filters): ibcast = images.broadcastable fbcast = filters.broadcastable igroups, icolors_per_group, irows, icols, icount = ibcast fmodulesR, fmodulesC, fcolors, frows, fcols = fbcast[:-2] fgroups, filters_per_group = fbcast[-2:] hbcast = (fgroups, filters_per_group, fmodulesR, fmodulesC, icount) if not isinstance(images.type, CudaNdarrayType): raise TypeError('gpu_filter_acts requires CudaNdarray images', images) if not isinstance(filters.type, CudaNdarrayType): raise TypeError('gpu_filter_acts requires CudaNdarray filters', filters) htype = CudaNdarrayType(broadcastable=hbcast) return theano.gof.Apply(self, [images, filters], [htype()])
def make_node(self, V, W, b, d): """ :param V: Visible unit, input :param W: Weights, filter :param b: bias :param d: strides when moving the filter over the input """ V_ = as_cuda_ndarray_variable(V) W_ = as_cuda_ndarray_variable(W) b_ = as_cuda_ndarray_variable(b) d_ = T.as_tensor_variable(d) broad = (V_.broadcastable[0], W_.broadcastable[0], False, False, False) return theano.Apply( self, inputs=[V_, W_, b_, d_], outputs=[CudaNdarrayType(dtype=V_.dtype, broadcastable=broad)()])
def make_node(self, *inputs): assert self.nout == 1 assert len(inputs) == 2 # TODO remove _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs] if self.nin > 0 and len(_inputs) != self.nin: raise TypeError('Wrong argument count', (self.nin, len(_inputs))) for i in _inputs[1:]: if i.type.ndim != inputs[0].type.ndim: raise TypeError('different ranks among inputs') if any([any(i.type.broadcastable) for i in inputs]): raise Exception("pycuda don't support broadcasted dimensions") otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim) out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)]) return out_node
def make_node(self, hid_acts, filters, output_shape=None): """ .. todo:: WRITEME Parameters ---------- hid_acts : WRITEME filters : WRITEME output_shape : 2-element TensorVariable, optional The spatial shape of the image """ if not isinstance(hid_acts.type, CudaNdarrayType): raise TypeError("ImageActs: expected hid_acts.type to be CudaNdarrayType, " "got " + str(hid_acts.type)) if not isinstance(filters.type, CudaNdarrayType): raise TypeError("ImageActs: expected filters.type to be CudaNdarrayType, " "got " + str(filters.type)) if output_shape is None: if self.stride != 1: raise ValueError("You must specify an output_shape for ImageActs if the stride is not 1.") hid_shape = hid_acts.shape[1:3] kernel_shape = filters.shape[1:3] output_shape = hid_shape + kernel_shape - 2 * self.pad - 1 assert hid_acts.ndim == 4 assert filters.ndim == 4 channels_broadcastable = filters.type.broadcastable[3] batch_broadcastable = hid_acts.type.broadcastable[3] # Computing whether the rows and columns are broadcastable requires doing # arithmetic on quantities that are known only at runtime, like the specific # shape of the image and kernel rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [hid_acts, filters, output_shape], [targets])
def make_node(self, pvals, unis): assert pvals.dtype == 'float32' assert unis.dtype == 'float32' if not isinstance(pvals.type, CudaNdarrayType): raise TypeError('pvals must be cudandarray', pvals) if not isinstance(unis.type, CudaNdarrayType): raise TypeError('unis must be cudandarray', unis) if self.odtype == 'auto': odtype = pvals.dtype else: odtype = self.odtype if odtype != pvals.dtype: raise NotImplementedError('GpuMultinomialFromUniform2 works only if' 'self.odtype == pvals.dtype', odtype, pvals.dtype) br = (unis.broadcastable[0], unis.broadcastable[1]) out = CudaNdarrayType(broadcastable=br)() return Apply(self, [pvals, unis], [out])
def make_node(self, mask_idx, image, og_sum, og_pow): mask_idx = contiguouse(mask_idx) image = contiguouse(image) inputs = [mask_idx, image] if str(og_sum) == "<DisconnectedType>" and \ str(og_pow) == "<DisconnectedType>": raise ValueError("At least sum or pow gradient must be provided") if str(og_sum) != "<DisconnectedType>": og_sum = contiguouse(og_sum) inputs.append(og_sum) if str(og_pow) != "<DisconnectedType>": og_pow = contiguouse(og_pow) inputs.append(og_pow) output_type = CudaNdarrayType(broadcastable=(False, ) * 4) return theano.Apply(self, inputs, [output_type()])
def make_node(self, images): images = as_cuda_ndarray_variable(images) assert images.ndim == 4 channels_broadcastable = images.type.broadcastable[0] batch_broadcastable = images.type.broadcastable[3] rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [images], [targets])
def new_auto_update(cls, generator, ndim, dtype, size, seed): """ Return a symbolic sample from generator. cls dictates the random variable (e.g. uniform, normal) """ v_size = theano.tensor.as_tensor_variable(size) if ndim is None: ndim = get_vector_length(v_size) self = cls(output_type=CudaNdarrayType((False, ) * ndim), seed=seed, destructive=False) o_gen, sample = self(generator, cast(v_size, 'int32')) sample.generator = generator # for user sample.update = (generator, o_gen) # for CURAND_RandomStreams generator.default_update = o_gen # for pfunc uses this attribute return sample
def make_node(self, *inputs): _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs] if self.nin > 0 and len(_inputs) != self.nin: raise TypeError('Wrong argument count', (self.nin, len(_inputs))) for i in _inputs[1:]: if i.type.ndim != inputs[0].type.ndim: raise TypeError('different ranks among inputs') if any([any(i.type.broadcastable) for i in inputs]): raise Exception("pycuda don't support broadcasted dimensions") assert len(inputs) == 2 # TODO remove otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim) assert self.nout == 1 fct_name = "pycuda_elemwise_%s" % str(self.scalar_op) out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)]) in_name = ["i" + str(id) for id in range(len(inputs))] out_name = ["o" + str(id) for id in range(self.nout)] c_code = self.scalar_op.c_code(out_node, "some_name", tuple([n + "[i]" for n in in_name]), tuple(n + "[i]" for n in out_name), {}) c_code_param = ", ".join([var.type.dtype_specs()[1] + " *" + name for var, name in (zip(inputs, in_name) + zip(out_node.outputs, out_name))] + ["int size"]) mod = SourceModule(""" #include<Python.h> #include <numpy/arrayobject.h> __global__ void %s(%s) { int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y); i += threadIdx.x + threadIdx.y*blockDim.x; if(i<size){ %s } } """ % (fct_name, c_code_param, c_code)) self.pycuda_fct = mod.get_function(fct_name) return out_node
def make_node(self, images): """ .. todo:: WRITEME """ images = as_cuda_ndarray_variable(images) assert images.ndim == 4 channels_broadcastable = images.type.broadcastable[0] batch_broadcastable = images.type.broadcastable[3] rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = CudaNdarrayType(broadcastable=targets_broadcastable) targets = targets_type() seed = self.seed_state seed = as_cuda_ndarray_variable(seed) return Apply(self, [images, seed], [targets])
def make_node(self, V, d, WShape, dCdH): """ Parameters ---------- V Visible. d Strides. WShape Shapes of the weights -> shape of this op output. dCdH Other input with what V will be convolved. """ V_ = as_cuda_ndarray_variable(V) d_ = T.as_tensor_variable(d) WShape_ = T.as_tensor_variable(WShape) dCdH_ = as_cuda_ndarray_variable(dCdH) broad = (False,) * 5 return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_], outputs=[CudaNdarrayType(dtype=V_.dtype, broadcastable=broad)()])
def output_type(self, inp): return CudaNdarrayType(broadcastable=[False] * inp.type.ndim)
def output_type(self, inp): # add one extra dim for real/imag return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim + 1))
def output_type(self, inp): # remove extra real/imag dim return CudaNdarrayType(broadcastable=[False] * (inp.type.ndim - 1))