def test_no_reuse(): x = lvector() y = lvector() f = aesara.function([x, y], x + y) # provide both inputs in the first call f(np.ones(10, dtype="int64"), np.ones(10, dtype="int64")) try: f(np.ones(10)) except TypeError: return assert not "should not get here"
def test_xent_thing_int32(self): x = matrix("x") y = lvector("y") yi = aet.cast(y, "int32") expressions = [ aet_sum(-log(softmax(x)[aet.arange(yi.shape[0]), yi])), -aet_sum(log(softmax(x)[aet.arange(yi.shape[0]), yi])), -aet_sum(log(softmax(x))[aet.arange(yi.shape[0]), yi]), aet_sum(-log(softmax(x))[aet.arange(yi.shape[0]), yi]), ] for expr in expressions: fgraph = FunctionGraph([x, y], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)] # Also verify the gradient wrt x fgraph = FunctionGraph([x, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 3 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode): n_in = 4098 n_out = 4099 y = lvector("y") b = fvector("b") # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = fmatrix("dot_result") xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size, ), dtype="int32") b_values = np.zeros((n_out, ), dtype="float32") W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32") dot_value = np.asarray(np.dot(xx, W_values), dtype="float32") del W_values p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b) y_pred = argmax(p_y_given_x, axis=-1) loss = -mean(log(p_y_given_x)[aet.arange(y.shape[0]), y]) dW = grad(loss, dot_result) classify = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = aesara.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([ isinstance(node.op, aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort() ]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def test_infer_shape(self): admat = matrix() alvec = lvector() rng = np.random.default_rng(utt.fetch_seed()) admat_val = rng.random((3, 2)).astype(config.floatX) alvec_val = [0, 1, 0] self._compile_and_check( [admat, alvec], [CrossentropyCategorical1Hot()(admat, alvec)], [admat_val, alvec_val], CrossentropyCategorical1Hot, )
def test_neg_idx(self): admat = matrix() advec = vector() alvec = lvector() rng = np.random.default_rng(utt.fetch_seed()) admat_val = rng.random((3, 5)).astype(config.floatX) advec_val = rng.random((5)).astype(config.floatX) alvec_val = rng.integers(low=0, high=5, size=3) alvec_val[1] = -1 out = CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec) f = aesara.function([admat, advec, alvec], out) with pytest.raises(ValueError): f(admat_val, advec_val, alvec_val)
def test_softmax_optimizations(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot # xe = op(x, one_of_n) fgraph = FunctionGraph([x, one_of_n], [op(softmax_legacy(x), one_of_n)]) assert fgraph.outputs[0].owner.op == op optdb.query(OPT_FAST_RUN).optimize(fgraph) assert fgraph.outputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_infer_shape(self): admat = matrix() advec = vector() alvec = lvector() rng = np.random.default_rng(utt.fetch_seed()) admat_val = rng.random((3, 5)).astype(config.floatX) advec_val = rng.random((5)).astype(config.floatX) alvec_val = rng.integers(low=0, high=5, size=3) self._compile_and_check( [admat, advec, alvec], CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec), [admat_val, advec_val, alvec_val], CrossentropySoftmaxArgmax1HotWithBias, )
def test_infer_shape(self): admat = matrix() advec = vector() alvec = lvector() rng = np.random.default_rng(utt.fetch_seed()) admat_val = rng.random((10, 5)).astype(config.floatX) admat_val /= admat_val.sum(axis=1).reshape(10, 1) advec_val = rng.random((10)).astype(config.floatX) alvec_val = rng.integers(low=0, high=5, size=10) self._compile_and_check( [advec, admat, alvec], [CrossentropySoftmax1HotWithBiasDx()(advec, admat, alvec)], [advec_val, admat_val, alvec_val], CrossentropySoftmax1HotWithBiasDx, )
def test_grad(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot xe = op(x, one_of_n) f = aesara.function([x, one_of_n], xe) x_val = np.asarray([[0.4, 0.6, 0.0], [0.1, 0.8, 0.1]], dtype=config.floatX) xe_val = f(x_val, [0, 1]) assert np.allclose(xe_val, -np.log([0.4, 0.8])) def oplike(x): return op(x, [0, 1]) utt.verify_grad(oplike, [x_val], rng=np.random)
def test_softmax_optimizations_w_bias2(self): x = matrix("x") b = vector("b") c = vector("c") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot fgraph = FunctionGraph([x, b, c, one_of_n], [op(softmax_legacy(add(x, b, c)), one_of_n)]) assert fgraph.outputs[0].owner.op == op optdb.query(OPT_FAST_RUN).optimize(fgraph) assert len(fgraph.toposort()) == 2 assert fgraph.outputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_op(self): n = lscalar() f = aesara.function([self.p, n], multinomial(n, self.p)) _n = 5 tested = f(self._p, _n) assert tested.shape == self._p.shape assert np.allclose(np.floor(tested.todense()), tested.todense()) assert tested[2, 1] == _n n = lvector() f = aesara.function([self.p, n], multinomial(n, self.p)) _n = np.asarray([1, 2, 3, 4], dtype="int64") tested = f(self._p, _n) assert tested.shape == self._p.shape assert np.allclose(np.floor(tested.todense()), tested.todense()) assert tested[2, 1] == _n[2]
def test_asymptotic_32(): # This test makes sure that our functions behave sensibly when # huge values are present # TODO: consider adding the optimization of crossentropy into the current # mode for the purpose of running this test for dtype in "float32", "float64": if dtype == "float32": x = fmatrix() x2 = fvector() else: x = dmatrix() x2 = dvector() y = lvector() c = categorical_crossentropy(softmax(x + x2), y) f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN") xval = np.zeros((5, 5), dtype=dtype).astype(dtype) x2val = np.zeros(5, dtype=xval.dtype).astype(dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval -= 100.3 * gxval assert cval == 0 # no problem going to zero error # what about when x gets really big? xval = np.zeros((5, 5), dtype=dtype) x2val = np.zeros(5, dtype=xval.dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval += 100000.3 * gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25
def test_softmax_grad_optimizations(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot xe = op(softmax_legacy(x), one_of_n) sum_xe = aet_sum(xe) g_x = grad(sum_xe, x) fgraph = FunctionGraph([x, one_of_n], [g_x]) assert check_stack_trace( fgraph, ops_to_check=[ crossentropy_softmax_1hot_with_bias_dx, softmax_legacy ], ) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_argmax_1hot_with_bias not in ops assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
class TestBinomial(utt.InferShapeTester): n = scalar(dtype="int64") p = scalar() shape = lvector() _n = 5 _p = 0.25 _shape = np.asarray([3, 5], dtype="int64") inputs = [n, p, shape] _inputs = [_n, _p, _shape] def setup_method(self): super().setup_method() self.op_class = Binomial def test_op(self): for sp_format in sparse.sparse_formats: for o_type in sparse.float_dtypes: f = aesara.function(self.inputs, Binomial(sp_format, o_type)(*self.inputs)) tested = f(*self._inputs) assert tested.shape == tuple(self._shape) assert tested.format == sp_format assert tested.dtype == o_type assert np.allclose(np.floor(tested.todense()), tested.todense()) def test_infer_shape(self): for sp_format in sparse.sparse_formats: for o_type in sparse.float_dtypes: self._compile_and_check( self.inputs, [Binomial(sp_format, o_type)(*self.inputs)], self._inputs, self.op_class, )
def test_crossentropy_softmax_1hot_with_bias_dxcale_cost(self): x = matrix("x") y = lvector("y") a = scalar("a") def validate_grad_graph(func): # The graph of the gradient should not have softmaxgrad anymore has_cx1hotdx = False has_softmax = False has_softmaxdx = False for node in func.maker.fgraph.toposort(): if node.op == crossentropy_softmax_1hot_with_bias_dx: has_cx1hotdx = True if node.op == softmax_legacy: has_softmax = True if node.op == softmax_grad_legacy: has_softmaxdx = True assert has_cx1hotdx assert has_softmax assert not has_softmaxdx # Cases to test expressions = [ a * aet_sum(-log(softmax(x)[aet.arange(y.shape[0]), y])), -a * aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])), a * (-aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y]))), a * aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])), a * aet_sum(-log(softmax(x))[aet.arange(y.shape[0]), y]), -a * aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]), a * (-aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y])), a * aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]), a * mean(-log(softmax(x)[aet.arange(y.shape[0]), y])), -a * mean(log(softmax(x)[aet.arange(y.shape[0]), y])), a * (-mean(log(softmax(x)[aet.arange(y.shape[0]), y]))), a * mean(log(softmax(x)[aet.arange(y.shape[0]), y])), a * mean(-log(softmax(x))[aet.arange(y.shape[0]), y]), -a * mean(log(softmax(x))[aet.arange(y.shape[0]), y]), a * (-mean(log(softmax(x))[aet.arange(y.shape[0]), y])), a * mean(log(softmax(x))[aet.arange(y.shape[0]), y]), ] for expr in expressions: fgraph = FunctionGraph([x, y, a], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) assert 5 <= len(fgraph.toposort()) <= 10 ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_argmax_1hot_with_bias in ops assert softmax_legacy not in ops # Verify the gradient wrt x fgraph = FunctionGraph([x, y, a], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) assert 3 <= len(fgraph.toposort()) <= 6 ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops # Verify the gradient when providing output gradient fgraph = FunctionGraph( [x, y, a], [grad(expr, x, known_grads={expr: a * x.sum()})]) optdb.query(OPT_FAST_RUN).optimize(fgraph) assert 6 <= len(fgraph.toposort()) <= 8 ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
def test_input_validation(self): with pytest.raises(TypeError, match="Matrix.*"): crossentropy_categorical_1hot(vector(), lvector()) with pytest.raises(TypeError, match="Integer.*"): crossentropy_categorical_1hot(matrix(), vector())
def test_get_rid_of_advanced_indexing_version_of_xent(self): x = matrix("x") b = vector("b") y = lvector("y") # Basic case expressions = [ aet_sum(-log(softmax(x)[aet.arange(y.shape[0]), y])), -aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])), -aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]), aet_sum(-log(softmax(x))[aet.arange(y.shape[0]), y]), ] for expr in expressions: fgraph = FunctionGraph([x, y], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)] # Also verify the gradient wrt x fgraph = FunctionGraph([x, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 2 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops # Test that a biased softmax is optimized correctly bias_expressions = [ aet_sum(-log(softmax(x + b)[aet.arange(y.shape[0]), y])), -aet_sum(log(softmax(b + x)[aet.arange(y.shape[0]), y])), -aet_sum(log(softmax(x + b))[aet.arange(y.shape[0]), y]), aet_sum(-log(softmax(b + x))[aet.arange(y.shape[0]), y]), ] for expr in bias_expressions: fgraph = FunctionGraph([x, b, y], [expr, x]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 2 # [big_op, sum] assert crossentropy_softmax_argmax_1hot_with_bias in ops fgraph = FunctionGraph([x, b, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 2 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad_legacy not in ops # Test that using "mean" instead of sum works, too mean_expressions = [ mean(-log(softmax(x)[aet.arange(y.shape[0]), y])), -mean(log(softmax(x)[aet.arange(y.shape[0]), y])), -mean(log(softmax(x))[aet.arange(y.shape[0]), y]), mean(-log(softmax(x))[aet.arange(y.shape[0]), y]), ] for expr in mean_expressions: fgraph = FunctionGraph([x, y], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 6 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)] fgraph = FunctionGraph([x, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 5 # there's an extra dimshuffle in there # but I can't think of a good rule to get rid of it assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops mean_bias_expressions = [ mean(-log(softmax(x + b)[aet.arange(y.shape[0]), y])), -mean(log(softmax(b + x)[aet.arange(y.shape[0]), y])), -mean(log(softmax(x + b))[aet.arange(y.shape[0]), y]), mean(-log(softmax(b + x))[aet.arange(y.shape[0]), y]), ] for expr in mean_bias_expressions: fgraph = FunctionGraph([x, b, y], [expr]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)] fgraph = FunctionGraph([x, b, y], [grad(expr, x)]) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = [node.op for node in fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad_legacy not in ops