def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = fmatrix() u = fvector() m = MultinomialFromUniform("auto")(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[0.2, 0.8]], [0.25]) utt.assert_allclose(r, [[0, 2]])
def test_sparseblockgemvF(self): # Test the fortran order for W (which can happen in the grad for some # graphs). b = fmatrix() W = ftensor4() h = ftensor3() iIdx = imatrix() oIdx = imatrix() o = self.gemv_op( b.take(oIdx, axis=0), DimShuffle((False, False, False, False), (0, 1, 3, 2))(aet.as_tensor_variable(W)), h, iIdx, oIdx, ) f = aesara.function([W, h, iIdx, b, oIdx], o, mode=self.mode) W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data() th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val) ref_out = self.gemv_numpy(b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val) utt.assert_allclose(ref_out, th_out)
def test_gpu_eigh_opt(self): A = fmatrix("A") fn = aesara.function([A], eigh(A), mode=mode_with_gpu) assert any([ isinstance(node.op, GpuMagmaEigh) for node in fn.maker.fgraph.toposort() ])
def test_gpu_matrix_inverse_inplace_opt(self): A = fmatrix("A") fn = aesara.function([A], matrix_inverse(A), mode=mode_with_gpu) assert any([ node.op.inplace for node in fn.maker.fgraph.toposort() if isinstance(node.op, GpuMagmaMatrixInverse) ])
def test_gpu_cholesky_inplace_opt(self): A = fmatrix("A") fn = aesara.function([A], GpuMagmaCholesky()(A), mode=mode_with_gpu) assert any([ node.op.inplace for node in fn.maker.fgraph.toposort() if isinstance(node.op, GpuMagmaCholesky) ])
def test_select_proportional_to_weight(self): # Tests that multinomial_wo_replacement selects elements, on average, # proportional to the their probabilities th_rng = RandomStream(12345) p = fmatrix() n = iscalar() m = th_rng.choice(size=n, p=p, replace=False) f = function([p, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 rng = np.random.default_rng(12345) pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = np.zeros((n_elements, ), dtype=config.floatX) for rep in range(10000): res = f(pvals, n_selected) res = np.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = np.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol
def test_Strides2D(self, mode): np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] op_class = partial(self.op_class, mode=mode) x = fmatrix("x") for axis in [0, 1, None, -1, -2]: a = np.random.random((42, 30)).astype("float32") cumop_function = aesara.function( [x], op_class(axis=axis)(x), mode=self.mode ) slicings = [ slice(None, None, None), # Normal strides slice(None, None, 2), # Stepped strides slice(None, None, -1), # Negative strides ] # Cartesian product of all slicings to test. for slicing in product(slicings, repeat=x.ndim): f = aesara.function( [x], op_class(axis=axis)(x[slicing]), mode=self.mode ) assert [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp) ] utt.assert_allclose(np_func(a[slicing], axis=axis), f(a)) utt.assert_allclose( np_func(a[slicing], axis=axis), cumop_function(a[slicing]) )
def test_select_proportional_to_weight(self): # Tests that ChoiceFromUniform selects elements, on average, # proportional to the their probabilities p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 rng = np.random.default_rng(12345) pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = np.zeros((n_elements, ), dtype=config.floatX) for rep in range(10000): uni = rng.random(n_selected).astype(config.floatX) res = f(pvals, uni, n_selected) res = np.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = np.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol, avg_diff
def test_hostfromgpu_shape_i(): # Test that the shape is lifted over hostfromgpu m = mode_with_gpu.including("local_dot_to_dot22", "local_dot22_to_dot22scalar", "specialize") a = fmatrix("a") ca = aesara.gpuarray.type.GpuArrayType("float32", (False, False))() av = np.asarray(np.random.rand(5, 4), dtype="float32") cv = gpuarray.asarray(np.random.rand(5, 4), dtype="float32", context=get_context(test_ctx_name)) f = aesara.function([a], GpuFromHost(test_ctx_name)(a), mode=m) assert any( isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort()) f = aesara.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m) topo = f.maker.fgraph.toposort() assert isinstance(topo[0].op, Shape_i) assert isinstance(topo[1].op, Shape_i) assert isinstance(topo[2].op, MakeVector) assert tuple(f(av)) == (5, 4) f = aesara.function([ca], host_from_gpu(ca), mode=m) assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()] f = aesara.function([ca], host_from_gpu(ca).shape, mode=m) topo = f.maker.fgraph.toposort() assert isinstance(topo[0].op, Shape_i) assert isinstance(topo[1].op, Shape_i) assert isinstance(topo[2].op, MakeVector) assert tuple(f(cv)) == (5, 4)
def test_gpu_qr_incomplete_opt(self): A = fmatrix("A") fn = aesara.function([A], qr(A, mode="r"), mode=mode_with_gpu) assert any([ isinstance(node.op, GpuMagmaQR) and not node.op.complete for node in fn.maker.fgraph.toposort() ])
def test_softmax_shape_0(self): x = fmatrix("x") z = aesara.tensor.nnet.softmax_legacy f, f_gpu = self._test_softmax(x, x, z, z, self._cmp) # Aesara can handle that case, but cudnn can't self._cmp(0, 10, f, f_gpu)
def test_softmax(self): x = fmatrix("x") z = aesara.tensor.nnet.softmax_legacy f, f_gpu = self._test_softmax(x, x, z, z, self._cmp) self._cmp(2 << 15, 5, f, f_gpu)
def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True): A = fmatrix("A") f = aesara.function( [A], gpu_svd(A, full_matrices=full_matrices, compute_uv=compute_uv), mode=mode_with_gpu, ) return f(A_val)
def run_gpu_cholesky(self, A_val, lower=True): A = fmatrix("A") f = aesara.function( [A], GpuMagmaCholesky(lower=lower)(A), mode=mode_with_gpu.excluding("cusolver"), ) return f(A_val)
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode): n_in = 4098 n_out = 4099 y = lvector("y") b = fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = fmatrix("dot_result") xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b) y_pred = argmax(p_y_given_x, axis=-1) loss = -mean(log(p_y_given_x)[aet.arange(y.shape[0]), y]) dW = grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def test_gpu_matrix_inverse(self): A = fmatrix("A") fn = aesara.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu) N = 1000 test_rng = np.random.default_rng(seed=1) # Copied from tests.tensor.utils.random. A_val = test_rng.random((N, N)).astype("float32") * 2 - 1 A_val_inv = fn(A_val) utt.assert_allclose(np.eye(N), np.dot(A_val_inv, A_val), atol=1e-2)
def test_incorrect_type(self): x = vector("x") with pytest.raises(TypeError): # Incorrect shape for test value x.tag.test_value = np.empty((2, 2)) x = fmatrix("x") with pytest.raises(TypeError): # Incorrect dtype (float64) for test value x.tag.test_value = np.random.random((3, 4))
def test_gemv_infershape(self): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = imatrix() oIdx = imatrix() self._compile_and_check( [W, h, iIdx, b, oIdx], [self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)], self.gemv_data(), self.gemv_class, )
def test_cpu_contiguous(): a = fmatrix("a") i = iscalar("i") a_val = np.asarray(np.random.rand(4, 5), dtype="float32") f = aesara.function([a, i], cpu_contiguous(a.reshape((5, 4))[::i])) topo = f.maker.fgraph.toposort() assert any([isinstance(node.op, CpuContiguous) for node in topo]) assert f(a_val, 1).flags["C_CONTIGUOUS"] assert f(a_val, 2).flags["C_CONTIGUOUS"] assert f(a_val, 3).flags["C_CONTIGUOUS"] # Test the grad: utt.verify_grad(cpu_contiguous, [np.random.rand(5, 7, 2)])
def test_dot_infershape(self): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = imatrix() oIdx = imatrix() self._compile_and_check( [W, h, iIdx, b, oIdx], [sparse_block_dot(W, h, iIdx, b, oIdx)], self.gemv_data(), self.gemv_class, )
def test_n_samples_1(): p = fmatrix() u = fvector() n = iscalar() m = MultinomialFromUniform("auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) np.random.seed(12345) for i in [1, 5, 10, 100, 1000, 10000]: uni = np.random.rand(2 * i).astype(config.floatX) res = f([[1.0, 0.0], [0.0, 1.0]], uni, i) utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
def test_transfer_cpu_gpu(): a = fmatrix("a") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") av = np.asarray(rng.rand(5, 4), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) f = aesara.function([a], GpuFromHost(test_ctx_name)(a)) fv = f(av) assert GpuArrayType.values_eq(fv, gv) f = aesara.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av)
def test_complex(self): rng = np.random.default_rng(2333) m = fmatrix() c = at_complex(m[0], m[1]) assert c.type == cvector r, i = [real(c), imag(c)] assert r.type == fvector assert i.type == fvector f = aesara.function([m], [r, i]) mval = np.asarray(rng.standard_normal((2, 5)), dtype="float32") rval, ival = f(mval) assert np.all(rval == mval[0]), (rval, mval[0]) assert np.all(ival == mval[1]), (ival, mval[1])
def test_gpu_singular_values(self): A = fmatrix("A") f_cpu = aesara.function([A], aesara.tensor.nlinalg.svd(A, compute_uv=False), mode=mode_without_gpu) f_gpu = aesara.function([A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu) A_val = random(50, 100).astype("float32") utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)) A_val = random(100, 50).astype("float32") utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
def test_gpu_contiguous(): a = fmatrix("a") i = iscalar("i") a_val = np.asarray(np.random.rand(4, 5), dtype="float32") # The reshape is needed otherwise we make the subtensor on the CPU # to transfer less data. f = aesara.function([a, i], gpu_contiguous(a.reshape((5, 4))[::i]), mode=mode_with_gpu) topo = f.maker.fgraph.toposort() assert any([isinstance(node.op, GpuSubtensor) for node in topo]) assert any([isinstance(node.op, GpuContiguous) for node in topo]) assert f(a_val, 1).flags.c_contiguous assert f(a_val, 2).flags.c_contiguous assert f(a_val, 2).flags.c_contiguous
def test_blocksparse_inplace_gemv_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], o) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv]) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
def test_blocksparse_inplace_outer_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], [o, aesara.gradient.grad(o.sum(), wrt=W)]) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
def test_GpuCrossentropySoftmax1HotWithBiasDx(): # This is basic test for GpuCrossentropySoftmax1HotWithBiasDx # We check that we loop when their is too much threads batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode): n_out = 4099 # Seed numpy.random with config.unittests__rseed utt.seed_rng() softmax_output_value = np.random.rand(batch_size, n_out).astype("float32") dnll_value = np.asarray(np.random.rand(batch_size), dtype="float32") y_idx_value = np.random.randint(low=0, high=5, size=batch_size) softmax_output = fmatrix() softmax_output /= softmax_output.sum(axis=1).reshape( softmax_output.shape[1], 1) op = crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value) cpu_f = aesara.function([softmax_output], op, mode=mode_without_gpu) gpu_f = aesara.function([softmax_output], op, mode=mode_with_gpu) # aesara.printing.debugprint(cpu_f) # aesara.printing.debugprint(gpu_f) assert any([ isinstance(node.op, aesara.tensor.nnet.CrossentropySoftmax1HotWithBiasDx) for node in cpu_f.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmax1HotWithBiasDx) for node in gpu_f.maker.fgraph.toposort() ]) cpu_out = cpu_f(softmax_output_value) gpu_out = gpu_f(softmax_output_value) rtol = 1e-5 atol = 1e-6 utt.assert_allclose(cpu_out, gpu_out, rtol=rtol, atol=atol)
def test_fail_select_alot(self): # Tests that multinomial_wo_replacement fails when asked to sample more # elements than the actual number of elements th_rng = RandomStream(12345) p = fmatrix() n = iscalar() m = th_rng.multinomial_wo_replacement(pvals=p, n=n) f = function([p, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 np.random.seed(12345) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) with pytest.raises(ValueError): f(pvals, n_selected)
def test_fail_select_alot(self): # Tests that ChoiceFromUniform fails when asked to sample more # elements than the actual number of elements p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 np.random.seed(12345) uni = np.random.rand(n_selected).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) with pytest.raises(ValueError): f(pvals, uni, n_selected)