def make_node(self, x, y, ilist): ctx_name = infer_context_name(x, y) x_ = as_gpuarray_variable(x, ctx_name) y_ = as_gpuarray_variable(y, ctx_name) ilist_ = tt.as_tensor_variable(ilist) assert x_.type.ndim >= y_.type.ndim if ilist_.type.dtype not in tt.integer_dtypes: raise TypeError("index must be integers") if ilist_.type.ndim != 1: raise TypeError("index must be vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") if y_.type.ndim > x_.type.ndim: if self.set_instead_of_inc: opname = "set" else: opname = "increment" raise TypeError( "cannot %s x subtensor with ndim=%s by y with ndim=%s " % (opname, x_.type.ndim, y_.type.ndim) ) return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
def make_node(self, x, y, *inputs): ctx_name = infer_context_name(x, y) x = as_gpuarray_variable(x, ctx_name) y = as_gpuarray_variable(y, ctx_name) rval = IncSubtensor.make_node(self, x, y, *inputs) ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()]) return ret
def make_node(self, x, y, ilist): """ It differs from GpuAdvancedIncSubtensor1 in that it makes sure the indexes are of type long. """ ctx_name = infer_context_name(x, y, ilist) x_ = as_gpuarray_variable(x, ctx_name) y_ = as_gpuarray_variable(y.astype(x.dtype), ctx_name) ilist_ = as_gpuarray_variable(ilist, ctx_name) assert x_.type.ndim >= y_.type.ndim if ilist_.type.dtype not in tt.integer_dtypes: raise TypeError("index must be integers") if ilist_.type.ndim != 1: raise TypeError("index must be vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") if y_.type.ndim > x_.type.ndim: if self.set_instead_of_inc: opname = "set" else: opname = "increment" raise TypeError( "cannot %s x subtensor with ndim=%s by y with ndim=%s " % (opname, x_.type.ndim, y_.type.ndim) ) return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
def make_node(self, inp, s=None): # A shape parameter is expected as an input. For now this is used to # manage odd transform sizes. # Later this could be extended to handle padding and trunkation, # following numpy's interface. However, cuFFT expects array that match # the shape given to the plan, so padding will have to be done in the op. # The effect of padding on gradients has yet to be investigated. if not skcuda_available: raise RuntimeError("skcuda is needed for CuIFFTOp") if not pygpu_available: raise RuntimeError("pygpu is needed for CuIFFTOp") if not pycuda_available: raise RuntimeError("pycuda is needed for CuIFFTOp") inp = gpu_contiguous(as_gpuarray_variable(inp, infer_context_name(inp))) # If no shape is provided as input, calculate shape assuming even real transform. if s is None: s = inp.shape[1:-1] s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2) s = tt.as_tensor_variable(s) assert inp.dtype == "float32" assert s.ndim == 1 return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError( "CUSOLVER is not available and " "GpuCusolverSolve Op can not be constructed." ) if skcuda.__version__ <= "0.5.1": warnings.warn( "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8" ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp1.broadcastable, context_name=context_name, )() ], )
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCusolverSolve Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn( 'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8' ) context_name = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, context_name) inp2 = basic_ops.as_gpuarray_variable(inp2, context_name) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply(self, [inp1, inp2], [ GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=context_name)() ])
def make_node(self, ten4, neib_shape, neib_step=None): ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4)) neib_shape = tt.as_tensor_variable(neib_shape) if neib_step is None: neib_step = neib_shape else: neib_step = tt.as_tensor_variable(neib_step) assert ten4.ndim == 4 assert neib_shape.ndim == 1 assert neib_step.ndim == 1 assert neib_shape.dtype in tt.integer_dtypes assert neib_step.dtype in tt.integer_dtypes return Apply( self, [ten4, neib_shape, neib_step], [ GpuArrayType( broadcastable=(False, False), dtype=ten4.type.dtype, context_name=ten4.type.context_name, )() ], )
def make_node(self, inp): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuLU Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn( 'The GpuLU op requires scikit-cuda > 0.5.1 to work with CUDA 8' ) if not pygpu_available: raise RuntimeError('Missing pygpu or triu/tril functions.' 'Install or update libgpuarray.') context_name = infer_context_name(inp) inp = as_gpuarray_variable(inp, context_name) inp = gpu_contiguous(inp) # this op can only operate on float32 matrices # because of current implementation of triu/tril. # TODO: support float64 assert inp.ndim == 2 assert inp.dtype == 'float32' # outputs LU in a single matrix, and a pivots array pivots_type = GpuArrayType('int32', broadcastable=inp[0].broadcastable, context_name=context_name)() return theano.Apply(self, [inp], [inp.type(), pivots_type])
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCusolverSolve Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') context_name = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, context_name) inp2 = basic_ops.as_gpuarray_variable(inp2, context_name) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply( self, [inp1, inp2], [GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=context_name)()])
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError( "CUBLAS is not available and " "GpuCublasTriangularSolve Op " "can not be constructed." ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp2.broadcastable, context_name=context_name, )() ], )
def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") if self.compute_uv: return theano.Apply( self, [A], # return S, U, VT [ GpuArrayType( A.dtype, broadcastable=[False], context_name=ctx_name )(), A.type(), A.type(), ], ) else: return theano.Apply( self, [A], # return only S [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()], )
def make_node(self, inp, s=None): # A shape parameter s can be provided as an input. For now this is used to # manage odd transform sizes. # Later this could be extended to handle padding and trunkation, # following numpy's interface. However, cuFFT expects array that match # the shape given to the plan, so padding will have to be done in the op. # The effect of padding on gradients has yet to be investigated. if not scikits_cuda_available: raise RuntimeError("skcuda is needed for CuFFTOp") if not pygpu_available: raise RuntimeError("pygpu is needed for CuFFTOp") if not pycuda_available: raise RuntimeError("pycuda is needed for CuFFTOp") inp = basic_ops.gpu_contiguous( basic_ops.as_gpuarray_variable(inp, basic_ops.infer_context_name(inp))) # If no shape is provided as input, default to input data shape. if s is None: s = inp.shape[1:] s = T.as_tensor_variable(s) assert inp.dtype == "float32" assert s.ndim == 1 assert 'int' in s.dtype return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_node(self, x, ilist): ctx_name = infer_context_name(x, ilist) x_ = as_gpuarray_variable(x, ctx_name) ilist__ = tt.as_tensor_variable(ilist) if ilist__.type.dtype not in tt.integer_dtypes: raise TypeError("index must be integers") if ilist__.type.dtype != "int64": ilist__ = tt.cast(ilist__, "int64") ilist_ = gpu_contiguous(as_gpuarray_variable(ilist__, ctx_name)) if ilist_.type.dtype != "int64": raise TypeError("index must be int64") if ilist_.type.ndim != 1: raise TypeError("index must be a vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") bcast = ilist_.broadcastable + x_.broadcastable[1:] return gof.Apply( self, [x_, ilist_], [GpuArrayType(dtype=x.dtype, context_name=ctx_name, broadcastable=bcast)()], )
def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") return theano.Apply(self, [A], [A.type()])
def make_node(self, x, *inputs): ctx_name = infer_context_name(x) rval = AdvancedSubtensor.make_node(self, x, *inputs) otype = GpuArrayType( dtype=rval.outputs[0].type.dtype, broadcastable=rval.outputs[0].type.broadcastable, context_name=ctx_name, ) x = as_gpuarray_variable(x, ctx_name) return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): img = gpu_contiguous(T.cast(X, 'float32')) kerns = gpu_contiguous(T.cast(w, 'float32')) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty(dtype='float32', context_name=infer_context_name(X))( img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def make_node(self, x, ilist): ctx_name = infer_context_name(x, ilist) x_ = as_gpuarray_variable(x, ctx_name) ilist_ = as_gpuarray_variable(ilist, ctx_name) if ilist_.type.dtype not in tensor.integer_dtypes: raise TypeError('index must be integers') if ilist_.type.ndim != 1: raise TypeError('index must be vector') if x_.type.ndim == 0: raise TypeError('cannot index into a scalar') return gof.Apply(self, [x_, ilist_], [x_.type()])
def make_node(self, _x): ctx_name = infer_context_name(_x) x = as_gpuarray_variable(_x, ctx_name) if x.ndim < 2: raise ValueError("Diagonal needs an input with 2 or more " "dimensions", x) axis_small, axis_large = sorted((self.axis1, self.axis2)) broadcastable = ( x.broadcastable[:axis_small] + x.broadcastable[axis_small + 1 : axis_large] + x.broadcastable[axis_large + 1 :] + (False,) ) return gof.Apply(self, [x], [x.type.clone(broadcastable=broadcastable)()])
def make_node(self, inp1, inp2): ctx = infer_context_name(inp1, inp2) inp1 = gpu_ops.as_gpuarray_variable(inp1, ctx) inp2 = gpu_ops.as_gpuarray_variable(inp2, ctx) assert inp1.dtype == "float32" assert inp2.dtype == "float32" assert inp1.ndim == 2 assert inp2.ndim == 2 otype = GpuArrayType(dtype=inp1.dtype, broadcastable=(False, False), context_name=ctx) return Apply(self, [inp1, inp2], [otype()])
def make_node(self, activations, labels, input_lengths): context_name = infer_context_name(activations) t_activations = as_gpuarray_variable(activations, context_name=context_name) # Ensure activations array is C-contiguous t_activations = gpu_contiguous(t_activations) # Labels and input lengths are always on the CPU t_labels = tt.as_tensor_variable(labels) t_input_lengths = tt.as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type.") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type.") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type.") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = GpuArrayType(dtype="float32", broadcastable=(False, ), context_name=context_name)() outputs = [costs] if self.compute_grad: gradients = GpuArrayType( dtype="float32", broadcastable=( False, False, False, ), context_name=context_name, )() outputs += [gradients] return theano.Apply(self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs)
def make_node(self, x, truth): context_name = basic_ops.infer_context_name(x, truth) x = basic_ops.gpu_contiguous(x) truth = basic_ops.gpu_contiguous(truth) if self.return_extras: return theano.Apply(self, [x, truth], [ T.scalar(), T.vector('int32'), T.scalar(), T.scalar(), T.scalar() ]) else: return theano.Apply(self, [x, truth], [T.scalar()])
def make_node(self, diag): ctx_name = infer_context_name(diag) diag = as_gpuarray_variable(diag, ctx_name) if diag.type.ndim < 1: raise ValueError( "AllocDiag needs an input with 1 or more " "dimensions", diag.type ) return gof.Apply( self, [diag], [ diag.type.__class__( dtype=diag.dtype, broadcastable=[False] * (diag.ndim + 1) )() ], )
def local_abstractconv_cudnn(node): ctx = infer_context_name(*node.inputs) if not isinstance(node.inputs[0].type, GpuArrayType): return if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None if isinstance(node.op, AbstractConv2d): with inherit_stack_trace(node.outputs): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d): with inherit_stack_trace(node.outputs): return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
def make_node(self, inp): if not cusolver_available: raise RuntimeError("CUSOLVER is not available and " "GpuCholesky Op can not be constructed.") if skcuda.__version__ <= "0.5.1": warnings.warn("The GpuCholesky op requires scikit-cuda > " "0.5.1 to work with CUDA 8") if not pygpu_available: raise RuntimeError("Missing pygpu or triu/tril functions." "Install or update libgpuarray.") context_name = infer_context_name(inp) inp = as_gpuarray_variable(inp, context_name) inp = gpu_contiguous(inp) assert inp.ndim == 2 return theano.Apply(self, [inp], [inp.type()])
def make_node(self, inp1, inp2): self.context = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, self.context) inp2 = basic_ops.as_gpuarray_variable(inp2, self.context) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply(self, [inp1, inp2], [ GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=self.context)() ])
def make_node(self, inp1, inp2): self.context = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, self.context) inp2 = basic_ops.as_gpuarray_variable(inp2, self.context) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply( self, [inp1, inp2], [GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=self.context)()])
def make_node(self, inp): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCholesky Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') if not pygpu_available: raise RuntimeError('Missing pygpu or triu/tril functions.' 'Install or update libgpuarray.') context_name = basic_ops.infer_context_name(inp) inp = basic_ops.as_gpuarray_variable(inp, context_name) inp = basic_ops.gpu_contiguous(inp) # this op can only operate on float32 matrices # because of current implementation of triu/tril. # TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano. assert inp.ndim == 2 assert inp.dtype == 'float32' return theano.Apply(self, [inp], [inp.type()])
def make_node(self, inp): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCholesky Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn( 'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8' ) if not pygpu_available: raise RuntimeError('Missing pygpu or triu/tril functions.' 'Install or update libgpuarray.') context_name = basic_ops.infer_context_name(inp) inp = basic_ops.as_gpuarray_variable(inp, context_name) inp = basic_ops.gpu_contiguous(inp) # this op can only operate on float32 matrices # because of current implementation of triu/tril. # TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano. assert inp.ndim == 2 assert inp.dtype == 'float32' return theano.Apply(self, [inp], [inp.type()])
def make_node(self, x): ctx_name = infer_context_name(x) x = as_gpuarray_variable(x, ctx_name) return Apply(self, [x], [x.type()])
def local_abstractconv_cudnn_alt(node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
def make_node(self, x): x = as_gpuarray_variable(x, infer_context_name(x)) return theano.Apply(self, [x], [x.type()])