def test_param_allow_downcast_vector_floatX(self): a = fvector("a") b = fvector("b") c = fvector("c") f = pfunc( [ In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None), ], (a + b + c), ) # If the values can be accurately represented, everything is OK z = [0] assert np.all(f(z, z, z) == 0) # If allow_downcast is True, idem assert np.allclose(f([0.1], z, z), 0.1) # If allow_downcast is False, nope with pytest.raises(TypeError): f(z, [0.1], z) # If allow_downcast is None, like False with pytest.raises(TypeError): f(z, z, [0.1])
def test_multinomial_dtypes(): p = dmatrix() u = dvector() m = MultinomialFromUniform("auto")(p, u) assert m.dtype == "float64", m.dtype p = fmatrix() u = fvector() m = MultinomialFromUniform("auto")(p, u) assert m.dtype == "float32", m.dtype p = fmatrix() u = fvector() m = MultinomialFromUniform("float64")(p, u) assert m.dtype == "float64", m.dtype
def test_profiling(self): config1 = config.profile config2 = config.profile_memory config3 = config.profiling__min_peak_memory try: config.profile = True config.profile_memory = True config.profiling__min_peak_memory = True x = [fvector("val%i" % i) for i in range(3)] z = [] z += [ aet.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1) ] z += [x[i] + x[i + 1] for i in range(len(x) - 1)] p = ProfileStats(False, gpu_checks=False) if config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]: m = "FAST_RUN" else: m = None f = function(x, z, profile=p, name="test_profiling", mode=m) inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))] f(*inp) buf = StringIO() f.profile.summary(buf) # regression testing for future algo speed up the_string = buf.getvalue() lines1 = [ l for l in the_string.split("\n") if "Max if linker" in l ] lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l] if config.device == "cpu": assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2) assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2) assert "CPU: 8208KB" in the_string, (lines1, lines2) assert ( "Minimum peak from all valid apply node order is 4104KB" in the_string), (lines1, lines2) else: assert "CPU: 16KB (16KB)" in the_string, (lines1, lines2) assert "GPU: 8204KB (8204KB)" in the_string, (lines1, lines2) assert "GPU: 12300KB (12300KB)" in the_string, (lines1, lines2) assert "GPU: 8212KB" in the_string, (lines1, lines2) assert ( "Minimum peak from all valid apply node order is 4116KB" in the_string), (lines1, lines2) finally: config.profile = config1 config.profile_memory = config2 config.profiling__min_peak_memory = config3
def test_Strides1D(self, mode): op_class = partial(self.op_class, mode=mode) np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] x = fvector("x") for axis in (0, None, -1): a = np.random.random((42, )).astype("float32") cumop_function = aesara.function([x], op_class(axis=axis)(x), mode=self.mode) slicings = [ slice(None, None, None), # Normal strides slice(None, None, 2), # Stepped strides slice(None, None, -1), # Negative strides ] # Cartesian product of all slicings to test. for slicing in product(slicings, repeat=x.ndim): f = aesara.function([x], op_class(axis=axis)(x[slicing]), mode=self.mode) assert [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp) ] utt.assert_allclose(np_func(a[slicing], axis=axis), f(a)) utt.assert_allclose(np_func(a[slicing], axis=axis), cumop_function(a[slicing]))
def test_cast_float16(self): f16 = vector(dtype="float16") f32 = fvector() i8 = bvector() f = aesara.function( [f16, f32, i8], [ f16.astype("float32"), f32.astype("float16"), f32.astype("float64"), f16.astype("int8"), f32.astype("int8"), i8.astype("float16"), i8.astype("float32"), ], mode=mode_with_gpu, ) d1 = (np.random.rand(4) * 10).astype("float16") d2 = (np.random.rand(5) * 10).astype("float32") d3 = (np.random.rand(6) * 10).astype("int8") res = f(d1, d2, d3) for i, out in enumerate(f.outputs): dtype = out.variable.dtype assert res[i].dtype == dtype inp = out.variable.owner.inputs[0] if inp.dtype == "float16": d = d1 elif inp.dtype == "float32": d = d2 else: d = d3 assert_allclose(d.astype(dtype), res[i])
def test_composite_elemwise_float16(self): w = bvector() x = vector(dtype="float16") y = fvector() cz = tanh(x + aet.cast(y, "float16")) o = ( cz - cz ** 2 + aet.cast(x, "int16") + aet.cast(x, "float32") + aet.cast(w, "float16") - aet.constant(np.float16(1.0)) ) aesara.function([w, x, y], o, mode=mode_with_gpu) v = vector(dtype="uint8") w = vector(dtype="float16") x = vector(dtype="float16") y = vector(dtype="float16") z = vector(dtype="float16") o = aet.switch(v, mul(w, x, y), z) aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
def test_output_broadcast_tensor(self): v = fvector("v") c, r = VecAsRowAndCol()(v) f = function([v], [c, r]) v_val = self.rng.standard_normal((5)).astype("float32") f(v_val)
def test_output_broadcast_tensor(self): v = fvector("v") c, r = VecAsRowAndCol()(v) f = aesara.function([v], [c, r]) v_val = self.rng.randn(5).astype("float32") f(v_val)
def test_select_proportional_to_weight(self): # Tests that ChoiceFromUniform selects elements, on average, # proportional to the their probabilities p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 10 mean_rtol = 0.0005 np.random.seed(12345) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) avg_pvals = np.zeros((n_elements,), dtype=config.floatX) for rep in range(10000): uni = np.random.rand(n_selected).astype(config.floatX) res = f(pvals, uni, n_selected) res = np.squeeze(res) avg_pvals[res] += 1 avg_pvals /= avg_pvals.sum() avg_diff = np.mean(abs(avg_pvals - pvals)) assert avg_diff < mean_rtol, avg_diff
def test_multinomial_0(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = fmatrix() u = fvector() m = MultinomialFromUniform("auto")(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[0.2, 0.8]], [0.25]) utt.assert_allclose(r, [[0, 2]])
def test_allow_downcast_floatX(self): a = fscalar("a") b = fvector("b") f = pfunc([a, b], (a + b), allow_input_downcast=True) g = pfunc([a, b], (a + b), allow_input_downcast=False) h = pfunc([a, b], (a + b), allow_input_downcast=None) # If the values can be accurately represented, OK assert np.all(f(0, [0]) == 0) assert np.all(g(0, [0]) == 0) assert np.all(h(0, [0]) == 0) # For the vector: OK iff allow_input_downcast is True assert np.allclose(f(0, [0.1]), 0.1) with pytest.raises(TypeError): g(0, [0.1]) with pytest.raises(TypeError): h(0, [0.1]) # For the scalar: OK if allow_input_downcast is True, # or None and floatX==float32 assert np.allclose(f(0.1, [0]), 0.1) with pytest.raises(TypeError): g(0.1, [0]) if config.floatX == "float32": assert np.allclose(h(0.1, [0]), 0.1) else: with pytest.raises(TypeError): h(0.1, [0])
def make_node(self, activations, labels, input_lengths): t_activations = at.as_tensor_variable(activations) # Ensure activations array is C-contiguous t_activations = cpu_contiguous(t_activations) t_labels = at.as_tensor_variable(labels) t_input_lengths = at.as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type!") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type!") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type!") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = fvector(name="ctc_cost") outputs = [costs] if self.compute_grad: gradients = ftensor3(name="ctc_grad") outputs += [gradients] return Apply(self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs)
def test_multinomial_large(): # DEBUG_MODE will test this on GPU p = fmatrix() u = fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u) f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ] ) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == "custom": assert mval.dtype == pval.dtype elif config.cast_policy == "numpy+floatX": assert mval.dtype == config.floatX elif config.cast_policy == "numpy": assert mval.dtype == "float64" else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = np.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_one_sequence_one_output_weights_gpu2(self): def f_rnn(u_t, x_tm1, W_in, W): return u_t * W_in + x_tm1 * W u = fvector("u") x0 = fscalar("x0") W_in = fscalar("win") W = fscalar("w") output, updates = scan( f_rnn, u, x0, [W_in, W], n_steps=None, truncate_gradient=-1, go_backwards=False, mode=mode_with_gpu, ) f2 = aesara.function( [u, x0, W_in, W], output, updates=updates, allow_input_downcast=True, mode=mode_with_gpu, ) # get random initial values rng = np.random.default_rng(utt.fetch_seed()) v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0) v_x0 = rng.uniform() W = rng.uniform() W_in = rng.uniform() # compute the output in numpy v_out = np.zeros((4, )) v_out[0] = v_u[0] * W_in + v_x0 * W for step in range(1, 4): v_out[step] = v_u[step] * W_in + v_out[step - 1] * W aesara_values = f2(v_u, v_x0, W_in, W) utt.assert_allclose(aesara_values, v_out) topo = f2.maker.fgraph.toposort() assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 1 assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4 scan_node = [ node for node in topo if isinstance(node.op, scan.op.Scan) ] assert len(scan_node) == 1 scan_node = scan_node[0] scan_node_topo = scan_node.op.fn.maker.fgraph.toposort() # check that there is no gpu transfer in the inner loop. assert any(isinstance(node.op, GpuElemwise) for node in scan_node_topo) assert not any( isinstance(node.op, HostFromGpu) for node in scan_node_topo) assert not any( isinstance(node.op, GpuFromHost) for node in scan_node_topo)
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode): n_in = 4098 n_out = 4099 y = lvector("y") b = fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = fmatrix("dot_result") xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b) y_pred = argmax(p_y_given_x, axis=-1) loss = -mean(log(p_y_given_x)[aet.arange(y.shape[0]), y]) dW = grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def test_subgraph_grad(): # Tests that the grad method with no known_grads # matches what happens if you use successive subgraph_grads x = fvector("x") t = fvector("t") w1 = aesara.shared(np.random.randn(3, 4)) w2 = aesara.shared(np.random.randn(4, 2)) a1 = tanh(dot(x, w1)) a2 = tanh(dot(a1, w2)) cost2 = sqr(a2 - t).sum() cost2 += sqr(w2.sum()) cost1 = sqr(w1.sum()) params = [[w2], [w1]] costs = [cost2, cost1] grad_ends = [[a1], [x]] inputs = [t, x] rng = np.random.RandomState([2012, 11, 15]) values = [rng.randn(2), rng.randn(3)] values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)] wrt = [w2, w1] cost = cost2 + cost1 true_grads = grad(cost, wrt) true_grads = aesara.function(inputs, true_grads) true_grads = true_grads(*values) next_grad = None param_grads = [] for i in range(2): param_grad, next_grad = subgraph_grad(wrt=params[i], end=grad_ends[i], start=next_grad, cost=costs[i]) next_grad = OrderedDict(zip(grad_ends[i], next_grad)) param_grads.extend(param_grad) pgrads = aesara.function(inputs, param_grads) pgrads = pgrads(*values) for true_grad, pgrad in zip(true_grads, pgrads): assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
def test_gpu3_mixture_dtype_outputs(self): def f_rnn(u_t, x_tm1, W_in, W): return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64")) u = fvector("u") x0 = fscalar("x0") W_in = fscalar("win") W = fscalar("w") output, updates = scan( f_rnn, u, [x0, None], [W_in, W], n_steps=None, truncate_gradient=-1, go_backwards=False, mode=self.mode_with_gpu, ) f2 = aesara.function( [u, x0, W_in, W], output, updates=updates, allow_input_downcast=True, mode=self.mode_with_gpu, ) # get random initial values rng = np.random.RandomState(utt.fetch_seed()) v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0) v_x0 = rng.uniform() W = rng.uniform() W_in = rng.uniform() # compute the output in numpy v_out1 = np.zeros((4, )) v_out2 = np.zeros((4, ), dtype="int64") v_out1[0] = v_u[0] * W_in + v_x0 * W v_out2[0] = v_u[0] + v_x0 for step in range(1, 4): v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W v_out2[step] = np.int64(v_u[step] + v_out1[step - 1]) aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W) utt.assert_allclose(aesara_out1, v_out1) utt.assert_allclose(aesara_out2, v_out2) topo = f2.maker.fgraph.toposort() scan_node = [node for node in topo if isinstance(node.op, Scan)] assert len(scan_node) == 1 scan_node = scan_node[0] assert self.is_scan_on_gpu(scan_node)
def test_n_samples_1(): p = fmatrix() u = fvector() n = iscalar() m = MultinomialFromUniform("auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) np.random.seed(12345) for i in [1, 5, 10, 100, 1000, 10000]: uni = np.random.rand(2 * i).astype(config.floatX) res = f([[1.0, 0.0], [0.0, 1.0]], uni, i) utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
def test_cloning_replace_not_strict_not_copy_inputs(self): # This has nothing to do with scan, but it refers to the clone # function that scan uses internally and that pfunc uses now and # that users might want to use x = vector("x") y = fvector("y") y2 = dvector("y2") z = shared(0.25) f1 = z * (x + y) ** 2 + 5 f2 = clone_replace( f1, replace=[(y, y2)], rebuild_strict=False, copy_inputs_over=False ) f2_inp = graph_inputs([f2]) assert z not in f2_inp assert x not in f2_inp assert y2 not in f2_inp
def test_fail_select_alot(self): # Tests that ChoiceFromUniform fails when asked to sample more # elements than the actual number of elements p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 100 n_selected = 200 np.random.seed(12345) uni = np.random.rand(n_selected).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) with pytest.raises(ValueError): f(pvals, uni, n_selected)
def test_gpu_opt_dtypes(): # Test if the returned samples are of the datatype specified for dtype in ["uint32", "float32", "int64", "float64"]: p = fmatrix() u = fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ] ) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 samples = f(pval, uval) assert samples.dtype == dtype, f"{samples.dtype} != {dtype}"
def test_select_distinct(self): # Tests that ChoiceFromUniform always selects distinct elements p = fmatrix() u = fvector() n = iscalar() m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) n_elements = 1000 all_indices = range(n_elements) np.random.seed(12345) for i in [5, 10, 50, 100, 500, n_elements]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) res = np.squeeze(res) assert len(res) == i, res assert np.all(np.in1d(np.unique(res), all_indices)), res
def test_GpuCumOp1D(self, mode): np_func = dict(add=np.cumsum, mul=np.cumprod)[mode] op_class = partial(self.op_class, mode=mode) block_max_size = self.max_threads_dim0 * 2 x = fvector("x") f = aesara.function([x], op_class(axis=0)(x), mode=self.mode) assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)] # Extensive testing for the first 1025 sizes a = np.random.random(1025).astype("float32") for i in range(a.shape[0]): utt.assert_allclose(np_func(a[:i]), f(a[:i])) # Use multiple GPU threadblocks a = np.random.random((block_max_size + 2,)).astype("float32") utt.assert_allclose(np_func(a), f(a)) # Use recursive cumop a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32") utt.assert_allclose(np_func(a), f(a))
def test_asymptotic_32(): # This test makes sure that our functions behave sensibly when # huge values are present # TODO: consider adding the optimization of crossentropy into the current # mode for the purpose of running this test for dtype in "float32", "float64": if dtype == "float32": x = fmatrix() x2 = fvector() else: x = dmatrix() x2 = dvector() y = lvector() c = categorical_crossentropy(softmax(x + x2), y) f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN") xval = np.zeros((5, 5), dtype=dtype).astype(dtype) x2val = np.zeros(5, dtype=xval.dtype).astype(dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval -= 100.3 * gxval assert cval == 0 # no problem going to zero error # what about when x gets really big? xval = np.zeros((5, 5), dtype=dtype) x2val = np.zeros(5, dtype=xval.dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval += 100000.3 * gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25
def test_gpu_opt_wor(): # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = fmatrix() u = fvector() n = iscalar() for replace in [False, True]: m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u, n) assert m.dtype == "int64", m.dtype f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ] ) n_samples = 3 pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones(pval.shape[0] * n_samples) * 0.5 f(pval, uval, n_samples) # Test with a row, it was failing in the past. r = frow() m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n) assert m.dtype == "int64", m.dtype f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAChoiceFromUniform for node in f.maker.fgraph.toposort() ] ) pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval, 1)
def test_gpu_opt(): # Does have some overlap with test_multinomial_0 # We test the case where we put the op on the gpu when the output # is moved to the gpu. p = fmatrix() u = fvector() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u) assert m.dtype == "float32", m.dtype f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ] ) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval) # Test with a row, it was failing in the past. r = frow() m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u) assert m.dtype == "float32", m.dtype f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ] ) pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 f(pval, uval)
def test_n_samples_2(): p = fmatrix() u = fvector() n = iscalar() m = MultinomialFromUniform("auto")(p, u, n) f = function([p, u, n], m, allow_input_downcast=True) np.random.seed(12345) for i in [1, 5, 10, 100, 1000]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 1000, (1, 1000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i for i in [1, 5, 10, 100, 1000]: uni = np.random.rand(i).astype(config.floatX) pvals = np.random.randint(1, 1000000, (1, 1000000)).astype(config.floatX) pvals /= pvals.sum(1) res = f(pvals, uni, i) assert res.sum() == i
def test_multinomial_large(): p = fmatrix() u = fvector() m = MultinomialFromUniform("auto")(p, u) f = function([p, u], m * 2, allow_input_downcast=True) pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1 pval = pval / pval.sum(axis=1)[:, None] uval = np.ones_like(pval[:, 0]) * 0.5 mval = f(pval, uval) assert mval.shape == pval.shape if config.cast_policy == "custom": assert mval.dtype == pval.dtype elif config.cast_policy == "numpy+floatX": assert mval.dtype == config.floatX elif config.cast_policy == "numpy": assert mval.dtype == "float64" else: raise NotImplementedError(config.cast_policy) utt.assert_allclose(mval.sum(axis=1), 2) asdf = np.asarray([0, 0, 2, 0]) + 0 * pval utt.assert_allclose(mval, asdf) # broadcast over all rows
def test_multinomial_output_dtype(): # This tests the MultinomialFromUniform Op directly, not going through the # multinomial() call in GPU random generation. p = fmatrix() u = fvector() for dtype in ["int64", "float32", "float16", "float64", "int32", "auto"]: m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u) # the m*2 allows the multinomial to reuse output f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu) assert any( [ type(node.op) is GPUAMultinomialFromUniform for node in f.maker.fgraph.toposort() ] ) # test that both first and second samples can be drawn utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]]) # test that both second labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31]) utt.assert_allclose(r, [[0, 2], [0, 2]]) # test that both first labels can be drawn r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21]) utt.assert_allclose(r, [[0, 2], [2, 0]]) # change the size to make sure output gets reallocated ok # and also make sure that the GPU version doesn't screw up the # transposed-ness r = f([[0.2, 0.8]], [0.25]) utt.assert_allclose(r, [[0, 2]])
def test_ravel_multi_index(self): def check(shape, index_ndim, mode, order): multi_index = np.unravel_index(np.arange(np.product(shape)), shape, order=order) # create some invalid indices to test the mode if mode in ("wrap", "clip"): multi_index = (multi_index[0] - 1, ) + multi_index[1:] # test with scalars and higher-dimensional indices if index_ndim == 0: multi_index = tuple(i[-1] for i in multi_index) elif index_ndim == 2: multi_index = tuple(i[:, np.newaxis] for i in multi_index) multi_index_symb = [aesara.shared(i) for i in multi_index] # reference result ref = np.ravel_multi_index(multi_index, shape, mode, order) def fn(mi, s): return function([], ravel_multi_index(mi, s, mode, order)) # shape given as a tuple f_array_tuple = fn(multi_index, shape) f_symb_tuple = fn(multi_index_symb, shape) np.testing.assert_equal(ref, f_array_tuple()) np.testing.assert_equal(ref, f_symb_tuple()) # shape given as an array shape_array = np.array(shape) f_array_array = fn(multi_index, shape_array) np.testing.assert_equal(ref, f_array_array()) # shape given as an Aesara variable shape_symb = aesara.shared(shape_array) f_array_symb = fn(multi_index, shape_symb) np.testing.assert_equal(ref, f_array_symb()) # shape testing self._compile_and_check( [], [ravel_multi_index(multi_index, shape_symb, mode, order)], [], RavelMultiIndex, ) for mode in ("raise", "wrap", "clip"): for order in ("C", "F"): for index_ndim in (0, 1, 2): check((3, ), index_ndim, mode, order) check((3, 4), index_ndim, mode, order) check((3, 4, 5), index_ndim, mode, order) # must provide integers with pytest.raises(TypeError): ravel_multi_index((fvector(), ivector()), (3, 4)) with pytest.raises(TypeError): ravel_multi_index(((3, 4), ivector()), (3.4, 3.2)) # dims must be a 1D sequence with pytest.raises(TypeError): ravel_multi_index(((3, 4), ), ((3, 4), ))