def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) if A.ndim != 2: raise LinAlgError("Matrix rank error") assert A.dtype == 'float32' if self.compute_uv: return theano.Apply( self, [A], # return S, U, VT [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)(), A.type(), A.type() ]) else: return theano.Apply( self, [A], # return only S [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)() ])
def make_node(self, images, top_down): """ .. todo:: WRITEME """ images = as_cuda_ndarray_variable(images) top_down = as_cuda_ndarray_variable(top_down) assert images.ndim == 4 assert top_down.ndim == 4 channels_broadcastable = images.type.broadcastable[0] batch_broadcastable = images.type.broadcastable[3] rows_broadcastable = False cols_broadcastable = False houtput_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) houtput_type = GpuArrayType(broadcastable=houtput_broadcastable) houtput = houtput_type() poutput_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) poutput_type = GpuArrayType(broadcastable=poutput_broadcastable) poutput = poutput_type() return Apply(self, [images, top_down], [houtput, poutput])
def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") if self.compute_uv: return theano.Apply( self, [A], # return S, U, VT [ GpuArrayType( A.dtype, broadcastable=[False], context_name=ctx_name )(), A.type(), A.type(), ], ) else: return theano.Apply( self, [A], # return only S [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()], )
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError( "CUBLAS is not available and " "GpuCublasTriangularSolve Op " "can not be constructed." ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp2.broadcastable, context_name=context_name, )() ], )
def make_node(self, inp): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuLU Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn( 'The GpuLU op requires scikit-cuda > 0.5.1 to work with CUDA 8' ) if not pygpu_available: raise RuntimeError('Missing pygpu or triu/tril functions.' 'Install or update libgpuarray.') context_name = infer_context_name(inp) inp = as_gpuarray_variable(inp, context_name) inp = gpu_contiguous(inp) # this op can only operate on float32 matrices # because of current implementation of triu/tril. # TODO: support float64 assert inp.ndim == 2 assert inp.dtype == 'float32' # outputs LU in a single matrix, and a pivots array pivots_type = GpuArrayType('int32', broadcastable=inp[0].broadcastable, context_name=context_name)() return theano.Apply(self, [inp], [inp.type(), pivots_type])
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCusolverSolve Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn( 'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8' ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply(self, [inp1, inp2], [ GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=context_name)() ])
def make_node(self, images, acts, denoms, dout): """ .. todo:: WRITEME """ if not isinstance(images.type, GpuArrayType): inputs = images, acts, denoms, dout names = "images", "acts", "denoms", "dout" for name, var in zip(names, inputs): if not isinstance(var.type, GpuArrayType): raise TypeError("CrossMapNormUndo: expected %s.type " "to be GpuArrayType, " "got %s" (name, str(images.type))) assert images.ndim == 4 assert acts.ndim == 4 assert denoms.ndim == 4 assert dout.ndim == 4 # Not strictly necessary I don't think assert images.type.broadcastable == acts.type.broadcastable assert images.type.broadcastable == denoms.type.broadcastable assert images.type.broadcastable == dout.type.broadcastable targets_broadcastable = tuple(images.type.broadcastable) targets_type = GpuArrayType(broadcastable=targets_broadcastable) targets = targets_type() out_acts = targets_type() return Apply(self, [images, acts, denoms, dout], [targets, out_acts])
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError( "CUSOLVER is not available and " "GpuCusolverSolve Op can not be constructed." ) if skcuda.__version__ <= "0.5.1": warnings.warn( "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8" ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp1.broadcastable, context_name=context_name, )() ], )
def make_node(self, images, filters): """ .. todo:: WRITEME """ if not isinstance(images.type, GpuArrayType): raise TypeError( "FilterActs: expected images.type to be GpuArrayType, " "got " + str(images.type)) if not isinstance(filters.type, GpuArrayType): raise TypeError( "FilterActs: expected filters.type to be GpuArrayType, " "got " + str(filters.type)) assert images.ndim == 4 assert filters.ndim == 4 channels_broadcastable = filters.type.broadcastable[3] batch_broadcastable = images.type.broadcastable[3] # Computing whether the rows and columns are broadcastable requires doing # arithmetic on quantities that are known only at runtime, like the specific # shape of the image and kernel rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = GpuArrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [images, filters], [targets])
def test_optimization(): op = CrossMapNorm(16, 15. / 16., 1, True) x_ = theano.tensor.TensorVariable(GpuArrayType([False] * 4)) f = theano.function([x_], theano.grad(op(x_)[0].sum(), x_)) nodes = [ x for x in f.maker.fgraph.apply_nodes if type(x.op) == CrossMapNormUndo ] assert len(nodes) == 1 assert nodes[0].op.inplace
def make_node(self, images, hid_grads, output_shape): """ .. todo:: WRITEME """ if not isinstance(images.type, GpuArrayType): raise TypeError("WeightActs: expected images.type " "to be GpuArrayType, " "got " + str(images.type)) if not isinstance(hid_grads.type, GpuArrayType): raise TypeError("WeightActs: expected hid_acts.type " "to be GpuArrayType, " "got " + str(hid_grads.type)) assert images.ndim == 4 assert hid_grads.ndim == 4 input_channels_broadcastable = images.type.broadcastable[0] # We don't know anything about filter_rows or filter_cols at compile # time, so we assume they're not broadcastable. filter_rows_broadcastable = False filter_cols_broadcastable = False output_channels_broadcastable = hid_grads.type.broadcastable[0] weights_grads_type = GpuArrayType( (input_channels_broadcastable, filter_rows_broadcastable, filter_cols_broadcastable, output_channels_broadcastable)) partial_sums_type = GpuArrayType((False, ) * 5) weights_grads = weights_grads_type() partial_sums = partial_sums_type() return Apply(self, [images, hid_grads, output_shape], [weights_grads, partial_sums])
def make_node(self, hid_acts, filters, output_shape=None): """ .. todo:: WRITEME Parameters ---------- hid_acts : WRITEME filters : WRITEME output_shape : 2-element TensorVariable, optional The spatial shape of the image """ if not isinstance(hid_acts.type, GpuArrayType): raise TypeError("ImageActs: expected hid_acts.type to be GpuArrayType, " "got " + str(hid_acts.type)) if not isinstance(filters.type, GpuArrayType): raise TypeError("ImageActs: expected filters.type to be GpuArrayType, " "got " + str(filters.type)) if output_shape is None: if self.stride != 1: raise ValueError("You must specify an output_shape for ImageActs if the stride is not 1.") hid_shape = hid_acts.shape[1:3] kernel_shape = filters.shape[1:3] output_shape = hid_shape + kernel_shape - 2 * self.pad - 1 assert hid_acts.ndim == 4 assert filters.ndim == 4 channels_broadcastable = filters.type.broadcastable[3] batch_broadcastable = hid_acts.type.broadcastable[3] # Computing whether the rows and columns are broadcastable requires doing # arithmetic on quantities that are known only at runtime, like the specific # shape of the image and kernel rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = GpuArrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [hid_acts, filters, output_shape], [targets])
def make_node(self, images): """ .. todo:: WRITEME """ if not isinstance(images.type, GpuArrayType): raise TypeError( "CrossMapNorm: expected images.type to be GpuArrayType, " "got " + str(images.type)) assert images.ndim == 4 targets_broadcastable = images.type.broadcastable targets_type = GpuArrayType(broadcastable=targets_broadcastable) denoms = targets_type() targets = targets_type() return Apply(self, [images], [targets, denoms])
def make_node(self, images, filters): """ .. todo:: WRITEME """ ibcast = images.broadcastable fbcast = filters.broadcastable igroups, icolors_per_group, irows, icols, icount = ibcast fmodulesR, fmodulesC, fcolors, frows, fcols = fbcast[:-2] fgroups, filters_per_group = fbcast[-2:] hbcast = (fgroups, filters_per_group, fmodulesR, fmodulesC, icount) if not isinstance(images.type, GpuArrayType): raise TypeError('gpu_filter_acts requires CudaNdarray images', images) if not isinstance(filters.type, GpuArrayType): raise TypeError('gpu_filter_acts requires CudaNdarray filters', filters) htype = GpuArrayType(broadcastable=hbcast) return theano.gof.Apply(self, [images, filters], [htype()])
def make_node(self, inp1, inp2): self.context = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, self.context) inp2 = basic_ops.as_gpuarray_variable(inp2, self.context) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply(self, [inp1, inp2], [ GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=self.context)() ])
def make_node(self, images): """ .. todo:: WRITEME """ images = as_cuda_ndarray_variable(images) assert images.ndim == 4 channels_broadcastable = images.type.broadcastable[0] batch_broadcastable = images.type.broadcastable[3] rows_broadcastable = False cols_broadcastable = False targets_broadcastable = (channels_broadcastable, rows_broadcastable, cols_broadcastable, batch_broadcastable) targets_type = GpuArrayType(broadcastable=targets_broadcastable) targets = targets_type() return Apply(self, [images], [targets])
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError('CUBLAS is not available and ' 'GpuCublasTriangularSolve Op can not be constructed.') context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply(self, [inp1, inp2], [GpuArrayType('float32', broadcastable=inp2.broadcastable, context_name=context_name)()])
def output_type(self, inp): return GpuArrayType(inp.dtype, broadcastable=[False] * (inp.type.ndim), context_name=inp.type.context_name)
def test_cross_map_norm_simple(): op = CrossMapNorm(16, 15. / 16., 1., True) x = CudaNdarray(numpy.ones((16, 2, 2, 2), dtype='float32')) x_ = theano.tensor.TensorVariable(GpuArrayType([False] * 4)) f = theano.function([x_], op(x_)[0]) numpy.testing.assert_allclose(f(x), 0.0625)
def get_gpu_tensor(self): broadcastable = (False,) * self.tensor_size return GpuArrayType(self.dtype, broadcastable)()
def output_type(self, inp): # add one extra dim for real/imag return GpuArrayType(inp.dtype, broadcastable=[False] * (inp.type.ndim + 1), context_name=inp.type.context_name)