def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") if self.compute_uv: return Apply( self, [A], # return S, U, VT [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)(), A.type(), A.type(), ], ) else: return Apply( self, [A], # return only S [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)() ], )
def test_filter_variable(): # Test that filter_variable accepts more restrictive broadcast gpu_row = GpuArrayType(dtype=aesara.config.floatX, broadcastable=(True, False)) gpu_matrix = GpuArrayType(dtype=aesara.config.floatX, broadcastable=(False, False)) r = gpu_row() m = gpu_matrix.filter_variable(r) assert m.type == gpu_matrix # On CPU as well r = row() m = gpu_matrix.filter_variable(r) assert m.type == gpu_matrix
def make_node(self, activations, labels, input_lengths): context_name = infer_context_name(activations) t_activations = as_gpuarray_variable(activations, context_name=context_name) # Ensure activations array is C-contiguous t_activations = gpu_contiguous(t_activations) # Labels and input lengths are always on the CPU t_labels = as_tensor_variable(labels) t_input_lengths = as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type.") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type.") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type.") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = GpuArrayType(dtype="float32", broadcastable=(False, ), context_name=context_name)() outputs = [costs] if self.compute_grad: gradients = GpuArrayType( dtype="float32", broadcastable=( False, False, False, ), context_name=context_name, )() outputs += [gradients] return Apply(self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs)
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError("CUBLAS is not available and " "GpuCublasTriangularSolve Op " "can not be constructed.") context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == inp2.dtype return Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp2.broadcastable, context_name=context_name, )() ], )
def output_type(self, inp): # add one extra dim for real/imag return GpuArrayType( inp.dtype, broadcastable=[False] * (inp.type.ndim + 1), context_name=inp.type.context_name, )
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError("CUSOLVER is not available and " "GpuCusolverSolve Op can not be constructed.") if skcuda.__version__ <= "0.5.1": warnings.warn( "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8" ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == inp2.dtype return Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp1.broadcastable, context_name=context_name, )() ], )
def make_node(self, ten4, neib_shape, neib_step=None): ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4)) neib_shape = tt.as_tensor_variable(neib_shape) if neib_step is None: neib_step = neib_shape else: neib_step = tt.as_tensor_variable(neib_step) assert ten4.ndim == 4 assert neib_shape.ndim == 1 assert neib_step.ndim == 1 assert neib_shape.dtype in tt.integer_dtypes assert neib_step.dtype in tt.integer_dtypes return Apply( self, [ten4, neib_shape, neib_step], [ GpuArrayType( broadcastable=(False, False), dtype=ten4.type.dtype, context_name=ten4.type.context_name, )() ], )
def make_node(self, x, b, y_idx): ctx_name = infer_context_name(x, b, y_idx) x = as_gpuarray_variable(x, ctx_name) b = as_gpuarray_variable(b, ctx_name) y_idx = as_gpuarray_variable(y_idx, ctx_name) nll = GpuArrayType(x.type.dtype, y_idx.type.broadcastable, context_name=ctx_name)() sm = x.type() am = y_idx.type() return Apply(self, [x, b, y_idx], [nll, sm, am])
def test_rebroadcast(): for dtype in ["float16", "float32"]: a = rand_gpuarray(1, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g") f = aesara.function([g], Rebroadcast((0, True))(g)) assert isinstance(f.maker.fgraph.toposort()[0].op, Rebroadcast) res = f(a) assert GpuArrayType.values_eq(res, a)
def test_deep_copy(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g") f = aesara.function([g], g) assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp) res = f(a) assert GpuArrayType.values_eq(res, a)
def make_node(self, n, m): n = tensor.as_tensor_variable(n) m = tensor.as_tensor_variable(m) assert n.ndim == 0 assert m.ndim == 0 otype = GpuArrayType( dtype=self.dtype, broadcastable=(False, False), context_name=self.context_name, ) return Apply(self, [n, m], [otype()])
def test_view(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g") m = aesara.compile.get_default_mode().excluding("local_view_op") f = aesara.function([g], ViewOp()(g), mode=m) assert isinstance(f.maker.fgraph.toposort()[0].op, ViewOp) res = f(a) assert GpuArrayType.values_eq(res, a)
def test_transfer_cpu_gpu(): a = tt.fmatrix("a") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") av = np.asarray(rng.rand(5, 4), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) f = aesara.function([a], GpuFromHost(test_ctx_name)(a)) fv = f(av) assert GpuArrayType.values_eq(fv, gv) f = aesara.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av)
def test_transfer_gpu_gpu(): g = GpuArrayType(dtype="float32", broadcastable=(False, False), context_name=test_ctx_name)() av = np.asarray(rng.rand(5, 4), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) mode = mode_with_gpu.excluding("cut_gpua_host_transfers", "local_cut_gpua_host_gpua") f = aesara.function([g], GpuToGpu(test_ctx_name)(g), mode=mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuToGpu) fv = f(gv) assert GpuArrayType.values_eq(fv, gv)
def result(inp): dtype = inp.dtype ctx_name = _name_for_ctx(inp.context) key = (dtype, ctx_name) f = result.cache.get(key, None) if f is None: guard_in = GpuArrayType(str(dtype), (False, ), context_name=ctx_name)() mode = get_mode("FAST_RUN").including("gpuarray") f = aesara.function([guard_in], op(guard_in), mode=mode, profile=False) result.cache[key] = f return f(inp)
def test_dump_load(): x = GpuArraySharedVariable( "x", GpuArrayType("float32", (1, 1), name="x", context_name=test_ctx_name), [[1]], False, ) with open("test", "wb") as f: dump(x, f) with open("test", "rb") as f: x = load(f) assert x.name == "x" np.testing.assert_allclose(x.get_value(), [[1]])
def test_shape(): x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])() v = gpuarray.zeros((3, 4, 5), dtype="float32", context=get_context(test_ctx_name)) f = aesara.function([x], x.shape) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) if aesara.config.mode != "FAST_COMPILE": assert len(topo) == 4 assert isinstance(topo[0].op, tt.opt.Shape_i) assert isinstance(topo[1].op, tt.opt.Shape_i) assert isinstance(topo[2].op, tt.opt.Shape_i) assert isinstance(topo[3].op, tt.opt.MakeVector) mode = mode_with_gpu.excluding("local_shape_to_shape_i") f = aesara.function([x], x.shape, mode=mode) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) assert len(topo) == 1 assert isinstance(topo[0].op, tt.Shape)
def make_node(self, pvals, unis): ctx_name = infer_context_name(pvals, unis) pvals = as_gpuarray_variable(pvals, ctx_name) unis = as_gpuarray_variable(unis, ctx_name) assert pvals.dtype in ["float32", "float16", "float64"] assert unis.dtype in ["float32", "float16", "float64"] if pvals.ndim != 2: raise NotImplementedError("pvals ndim should be 2", pvals.ndim) if unis.ndim != 1: raise NotImplementedError("unis ndim should be 1", unis.ndim) if self.odtype == "auto": odtype = pvals.dtype else: odtype = self.odtype br = (pvals.broadcastable[1], pvals.broadcastable[0]) out = GpuArrayType(broadcastable=br, dtype=odtype, context_name=ctx_name)() return Apply(self, [pvals, unis], [out])
def test_transfer_strided(): # This is just to ensure that it works in aesara # libgpuarray has a much more comprehensive suit of tests to # ensure correctness a = tt.fmatrix("a") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") av = np.asarray(rng.rand(5, 8), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) av = av[:, ::2] gv = gv[:, ::2] f = aesara.function([a], GpuFromHost(test_ctx_name)(a)) fv = f(av) assert GpuArrayType.values_eq(fv, gv) f = aesara.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av)
def test_specify_shape(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False,))("g") f = aesara.function([g], specify_shape(g, [20])) f(a)
GpuDnnConv, GpuDnnConvGradI, GpuDnnConvGradW, dnn_available, ) from aesara.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor from tests.gpuarray.config import mode_with_gpu, test_ctx_name from tests.tensor.nnet.test_abstract_conv import ( BaseTestConv2d, BaseTestConv3d, TestConv2dTranspose, TestConvTypes, ) gpu_ftensor4 = GpuArrayType(dtype="float32", broadcastable=(False,) * 4) class TestDnnConv2d(BaseTestConv2d): @classmethod def setup_class(cls): super().setup_class() cls.shared = staticmethod(gpuarray_shared_constructor) # provide_shape is not used by the cuDNN impementation cls.provide_shape = [False] @pytest.mark.skipif(dnn_available(test_ctx_name), reason=dnn_available.msg) def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)): mode = mode_with_gpu if fd != (1, 1):
def new(cls, rstate, ndim, dtype, size): v_size = as_tensor_variable(size) if ndim is None: ndim = get_vector_length(v_size) op = cls(GpuArrayType(dtype, (False, ) * ndim)) return op(rstate, v_size)