def test_param_allow_downcast_vector_floatX(self): a = tensor.fvector("a") b = tensor.fvector("b") c = tensor.fvector("c") f = pfunc( [ In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None), ], (a + b + c), ) # If the values can be accurately represented, everything is OK z = [0] assert np.all(f(z, z, z) == 0) # If allow_downcast is True, idem assert np.allclose(f([0.1], z, z), 0.1) # If allow_downcast is False, nope with pytest.raises(TypeError): f(z, [0.1], z) # If allow_downcast is None, like False with pytest.raises(TypeError): f(z, z, [0.1])
def test_multinomial_dtypes(): p = tensor.dmatrix() u = tensor.dvector() m = multinomial.MultinomialFromUniform("auto")(p, u) assert m.dtype == "float64", m.dtype p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform("auto")(p, u) assert m.dtype == "float32", m.dtype p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform("float64")(p, u) assert m.dtype == "float64", m.dtype
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.DebugMode): n_in = 4098 n_out = 4099 y = tt.lvector("y") b = tt.fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = tt.fmatrix("dot_result") # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = tt.nnet.softmax(dot_result + b) y_pred = tt.argmax(p_y_given_x, axis=-1) loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y]) dW = tt.grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def make_node(self, activations, labels, input_lengths): t_activations = tt.as_tensor_variable(activations) # Ensure activations array is C-contiguous t_activations = cpu_contiguous(t_activations) t_labels = tt.as_tensor_variable(labels) t_input_lengths = tt.as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type!") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type!") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type!") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = tt.fvector(name="ctc_cost") outputs = [costs] if self.compute_grad: gradients = tt.ftensor3(name="ctc_grad") outputs += [gradients] return gof.Apply( self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs )
def test_multinomial_large(): # DEBUG_MODE will test this on GPU p = tensor.fmatrix() u = tensor.fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u) f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == "custom": assert mval.dtype == pval.dtype elif config.cast_policy == "numpy+floatX": assert mval.dtype == config.floatX elif config.cast_policy == "numpy": assert mval.dtype == "float64" else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = np.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_select_proportional_to_weight(self): # Tests that ChoiceFromUniform selects elements, on average, # proportional to the their probabilities p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 np.random.seed(12345) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = np.zeros((n_elements, ), dtype=config.floatX) for rep in range(10000): uni = np.random.rand(n_selected).astype(config.floatX) res = f(pvals, uni, n_selected) res = np.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = np.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol, avg_diff
def test_gpu_opt(): # Does have some overlap with test_multinomial_0 # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u) assert m.dtype == "float32", m.dtype f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval) # Test with a row, it was failing in the past. r = tensor.frow() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u) assert m.dtype == "float32", m.dtype f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval)
def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform("auto")(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[0.2, 0.8]], [0.25]) utt.assert_allclose(r, [[0, 2]])
def test_Strides1D(self, mode): op_class = partial(self.op_class, mode=mode) np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] x = tt.fvector("x") for axis in [0, None, -1]: a = np.random.random((42,)).astype("float32") cumop_function = aesara.function( [x], op_class(axis=axis)(x), mode=self.mode ) slicings = [ slice(None, None, None), # Normal strides slice(None, None, 2), # Stepped strides slice(None, None, -1), # Negative strides ] # Cartesian product of all slicings to test. for slicing in product(slicings, repeat=x.ndim): f = aesara.function( [x], op_class(axis=axis)(x[slicing]), mode=self.mode ) assert [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp) ] utt.assert_allclose(np_func(a[slicing], axis=axis), f(a)) utt.assert_allclose( np_func(a[slicing], axis=axis), cumop_function(a[slicing]) )
def test_allow_downcast_floatX(self): a = tensor.fscalar("a") b = tensor.fvector("b") f = pfunc([a, b], (a + b), allow_input_downcast=True) g = pfunc([a, b], (a + b), allow_input_downcast=False) h = pfunc([a, b], (a + b), allow_input_downcast=None) # If the values can be accurately represented, OK assert np.all(f(0, [0]) == 0) assert np.all(g(0, [0]) == 0) assert np.all(h(0, [0]) == 0) # For the vector: OK iff allow_input_downcast is True assert np.allclose(f(0, [0.1]), 0.1) with pytest.raises(TypeError): g(0, [0.1]) with pytest.raises(TypeError): h(0, [0.1]) # For the scalar: OK if allow_input_downcast is True, # or None and floatX==float32 assert np.allclose(f(0.1, [0]), 0.1) with pytest.raises(TypeError): g(0.1, [0]) if config.floatX == "float32": assert np.allclose(h(0.1, [0]), 0.1) else: with pytest.raises(TypeError): h(0.1, [0])
def test_profiling(self): config1 = aesara.config.profile config2 = aesara.config.profile_memory config3 = aesara.config.profiling.min_peak_memory try: aesara.config.profile = True aesara.config.profile_memory = True aesara.config.profiling.min_peak_memory = True x = [tt.fvector("val%i" % i) for i in range(3)] z = [] z += [tt.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)] z += [x[i] + x[i + 1] for i in range(len(x) - 1)] p = aesara.ProfileStats(False, gpu_checks=False) if aesara.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]: m = "FAST_RUN" else: m = None f = aesara.function(x, z, profile=p, name="test_profiling", mode=m) inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))] f(*inp) buf = StringIO() f.profile.summary(buf) # regression testing for future algo speed up the_string = buf.getvalue() lines1 = [l for l in the_string.split("\n") if "Max if linker" in l] lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l] if aesara.config.device == "cpu": assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2) assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2) assert "CPU: 8208KB" in the_string, (lines1, lines2) assert ( "Minimum peak from all valid apply node order is 4104KB" in the_string ), (lines1, lines2) else: assert "CPU: 16KB (16KB)" in the_string, (lines1, lines2) assert "GPU: 8204KB (8204KB)" in the_string, (lines1, lines2) assert "GPU: 12300KB (12300KB)" in the_string, (lines1, lines2) assert "GPU: 8212KB" in the_string, (lines1, lines2) assert ( "Minimum peak from all valid apply node order is 4116KB" in the_string ), (lines1, lines2) finally: aesara.config.profile = config1 aesara.config.profile_memory = config2 aesara.config.profiling.min_peak_memory = config3
def test_n_samples_1(): p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialFromUniform("auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) np.random.seed(12345) for i in [1, 5, 10, 100, 1000, 10000]: uni = np.random.rand(2 * i).astype(config.floatX) res = f([[1.0, 0.0], [0.0, 1.0]], uni, i) utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
def test_gpu_opt_dtypes(): # Test if the returned samples are of the datatype specified for dtype in ["uint32", "float32", "int64", "float64"]: p = tensor.fmatrix() u = tensor.fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 samples = f(pval, uval) assert samples.dtype == dtype, "{} != {}".format(samples.dtype, dtype)
def test_gpu_opt_wor(): # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() for replace in [False, True]: m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u, n) assert m.dtype == "int64", m.dtype f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ]) n_samples = 3 pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones(pval.shape[0] * n_samples) * 0.5 f(pval, uval, n_samples) # Test with a row, it was failing in the past. r = tensor.frow() m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n) assert m.dtype == "int64", m.dtype f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ]) pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval, 1)
def test_fail_select_alot(self): # Tests that ChoiceFromUniform fails when asked to sample more # elements than the actual number of elements p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 np.random.seed(12345) uni = np.random.rand(n_selected).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) with pytest.raises(ValueError): f(pvals, uni, n_selected)
def test_GpuCumOp1D(self, mode): np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] op_class = partial(self.op_class, mode=mode) block_max_size = self.max_threads_dim0 * 2 x = tt.fvector("x") f = aesara.function([x], op_class(axis=0)(x), mode=self.mode) assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)] # Extensive testing for the first 1025 sizes a = np.random.random(1025).astype("float32") for i in range(a.shape[0]): utt.assert_allclose(np_func(a[:i]), f(a[:i])) # Use multiple GPU threadblocks a = np.random.random((block_max_size + 2,)).astype("float32") utt.assert_allclose(np_func(a), f(a)) # Use recursive cumop a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32") utt.assert_allclose(np_func(a), f(a))
def test_select_distinct(self): # Tests that ChoiceFromUniform always selects distinct elements p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) np.random.seed(12345) for i in [5, 10, 50, 100, 500, n_elements]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) res = np.squeeze(res) assert len(res) == i, res assert np.all(np.in1d(np.unique(res), all_indices)), res
def test_multinomial_large(): p = tensor.fmatrix() u = tensor.fvector() m = multinomial.MultinomialFromUniform("auto")(p, u) f = function([p, u], m * 2, allow_input_downcast=True) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == "custom": assert mval.dtype == pval.dtype elif config.cast_policy == "numpy+floatX": assert mval.dtype == config.floatX elif config.cast_policy == "numpy": assert mval.dtype == "float64" else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = np.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_n_samples_2(): p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.MultinomialFromUniform("auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) np.random.seed(12345) for i in [1, 5, 10, 100, 1000]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 1000, (1, 1000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i for i in [1, 5, 10, 100, 1000]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 1000000, (1, 1000000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i
def test_multinomial_output_dtype(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = tensor.fmatrix() u = tensor.fvector() for dtype in ["int64", "float32", "float16", "float64", "int32", "auto"]: m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any([ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ]) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[0.2, 0.8]], [0.25]) utt.assert_allclose(r, [[0, 2]])
def test_select_distinct(self): # Tests that ChoiceFromUniform always selects distinct elements p = tensor.fmatrix() u = tensor.fvector() n = tensor.iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) np.random.seed(12345) expected = [ np.asarray([[931, 318, 185, 209, 559]]), np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]), np.asarray([[ 546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335, 451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507, 730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672, ]]), ] for i in [5, 10, 50, 100, 500, n_elements]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) for ii in range(len(expected)): if expected[ii].shape == res.shape: assert (expected[ii] == res).all() res = np.squeeze(res) assert len(res) == i assert np.all(np.in1d(np.unique(res), all_indices)), res