Example #1
0
    def test_param_allow_downcast_vector_floatX(self):
        a = fvector("a")
        b = fvector("b")
        c = fvector("c")

        f = pfunc(
            [
                In(a, allow_downcast=True),
                In(b, allow_downcast=False),
                In(c, allow_downcast=None),
            ],
            (a + b + c),
        )

        # If the values can be accurately represented, everything is OK
        z = [0]
        assert np.all(f(z, z, z) == 0)

        # If allow_downcast is True, idem
        assert np.allclose(f([0.1], z, z), 0.1)

        # If allow_downcast is False, nope
        with pytest.raises(TypeError):
            f(z, [0.1], z)

        # If allow_downcast is None, like False
        with pytest.raises(TypeError):
            f(z, z, [0.1])
Example #2
0
def test_multinomial_dtypes():
    p = dmatrix()
    u = dvector()
    m = MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float64", m.dtype

    p = fmatrix()
    u = fvector()
    m = MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    p = fmatrix()
    u = fvector()
    m = MultinomialFromUniform("float64")(p, u)
    assert m.dtype == "float64", m.dtype
Example #3
0
    def test_profiling(self):

        config1 = config.profile
        config2 = config.profile_memory
        config3 = config.profiling__min_peak_memory
        try:
            config.profile = True
            config.profile_memory = True
            config.profiling__min_peak_memory = True

            x = [fvector("val%i" % i) for i in range(3)]

            z = []
            z += [
                aet.outer(x[i], x[i + 1]).sum(axis=1)
                for i in range(len(x) - 1)
            ]
            z += [x[i] + x[i + 1] for i in range(len(x) - 1)]

            p = ProfileStats(False, gpu_checks=False)

            if config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
                m = "FAST_RUN"
            else:
                m = None

            f = function(x, z, profile=p, name="test_profiling", mode=m)

            inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))]
            f(*inp)

            buf = StringIO()
            f.profile.summary(buf)

            # regression testing for future algo speed up
            the_string = buf.getvalue()
            lines1 = [
                l for l in the_string.split("\n") if "Max if linker" in l
            ]
            lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
            if config.device == "cpu":
                assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
                assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
                assert "CPU: 8208KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4104KB"
                    in the_string), (lines1, lines2)
            else:
                assert "CPU: 16KB (16KB)" in the_string, (lines1, lines2)
                assert "GPU: 8204KB (8204KB)" in the_string, (lines1, lines2)
                assert "GPU: 12300KB (12300KB)" in the_string, (lines1, lines2)
                assert "GPU: 8212KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4116KB"
                    in the_string), (lines1, lines2)

        finally:
            config.profile = config1
            config.profile_memory = config2
            config.profiling__min_peak_memory = config3
Example #4
0
    def test_Strides1D(self, mode):
        op_class = partial(self.op_class, mode=mode)
        np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
        x = fvector("x")

        for axis in (0, None, -1):
            a = np.random.random((42, )).astype("float32")
            cumop_function = aesara.function([x],
                                             op_class(axis=axis)(x),
                                             mode=self.mode)

            slicings = [
                slice(None, None, None),  # Normal strides
                slice(None, None, 2),  # Stepped strides
                slice(None, None, -1),  # Negative strides
            ]

            # Cartesian product of all slicings to test.
            for slicing in product(slicings, repeat=x.ndim):
                f = aesara.function([x],
                                    op_class(axis=axis)(x[slicing]),
                                    mode=self.mode)
                assert [
                    n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, GpuCumOp)
                ]
                utt.assert_allclose(np_func(a[slicing], axis=axis), f(a))
                utt.assert_allclose(np_func(a[slicing], axis=axis),
                                    cumop_function(a[slicing]))
Example #5
0
    def test_cast_float16(self):
        f16 = vector(dtype="float16")
        f32 = fvector()
        i8 = bvector()
        f = aesara.function(
            [f16, f32, i8],
            [
                f16.astype("float32"),
                f32.astype("float16"),
                f32.astype("float64"),
                f16.astype("int8"),
                f32.astype("int8"),
                i8.astype("float16"),
                i8.astype("float32"),
            ],
            mode=mode_with_gpu,
        )

        d1 = (np.random.rand(4) * 10).astype("float16")
        d2 = (np.random.rand(5) * 10).astype("float32")
        d3 = (np.random.rand(6) * 10).astype("int8")
        res = f(d1, d2, d3)

        for i, out in enumerate(f.outputs):
            dtype = out.variable.dtype
            assert res[i].dtype == dtype
            inp = out.variable.owner.inputs[0]
            if inp.dtype == "float16":
                d = d1
            elif inp.dtype == "float32":
                d = d2
            else:
                d = d3
            assert_allclose(d.astype(dtype), res[i])
Example #6
0
    def test_composite_elemwise_float16(self):
        w = bvector()
        x = vector(dtype="float16")
        y = fvector()

        cz = tanh(x + aet.cast(y, "float16"))
        o = (
            cz
            - cz ** 2
            + aet.cast(x, "int16")
            + aet.cast(x, "float32")
            + aet.cast(w, "float16")
            - aet.constant(np.float16(1.0))
        )

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = vector(dtype="uint8")
        w = vector(dtype="float16")
        x = vector(dtype="float16")
        y = vector(dtype="float16")
        z = vector(dtype="float16")

        o = aet.switch(v, mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
Example #7
0
    def test_output_broadcast_tensor(self):
        v = fvector("v")
        c, r = VecAsRowAndCol()(v)
        f = function([v], [c, r])

        v_val = self.rng.standard_normal((5)).astype("float32")
        f(v_val)
Example #8
0
    def test_output_broadcast_tensor(self):
        v = fvector("v")
        c, r = VecAsRowAndCol()(v)
        f = aesara.function([v], [c, r])

        v_val = self.rng.randn(5).astype("float32")
        f(v_val)
Example #9
0
    def test_select_proportional_to_weight(self):
        # Tests that ChoiceFromUniform selects elements, on average,
        # proportional to the their probabilities

        p = fmatrix()
        u = fvector()
        n = iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        np.random.seed(12345)
        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)

        for rep in range(10000):
            uni = np.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff
Example #10
0
def test_multinomial_0():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = fmatrix()
    u = fvector()

    m = MultinomialFromUniform("auto")(p, u)

    # the m*2 allows the multinomial to reuse output
    f = function([p, u], m * 2, allow_input_downcast=True)

    # test that both first and second samples can be drawn
    utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])

    # test that both second labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
    utt.assert_allclose(r, [[0, 2], [0, 2]])

    # test that both first labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
    utt.assert_allclose(r, [[0, 2], [2, 0]])

    # change the size to make sure output gets reallocated ok
    # and also make sure that the GPU version doesn't screw up the
    # transposed-ness
    r = f([[0.2, 0.8]], [0.25])
    utt.assert_allclose(r, [[0, 2]])
Example #11
0
    def test_allow_downcast_floatX(self):
        a = fscalar("a")
        b = fvector("b")

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert np.all(f(0, [0]) == 0)
        assert np.all(g(0, [0]) == 0)
        assert np.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert np.allclose(f(0, [0.1]), 0.1)
        with pytest.raises(TypeError):
            g(0, [0.1])
        with pytest.raises(TypeError):
            h(0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert np.allclose(f(0.1, [0]), 0.1)
        with pytest.raises(TypeError):
            g(0.1, [0])
        if config.floatX == "float32":
            assert np.allclose(h(0.1, [0]), 0.1)
        else:
            with pytest.raises(TypeError):
                h(0.1, [0])
Example #12
0
    def make_node(self, activations, labels, input_lengths):
        t_activations = at.as_tensor_variable(activations)
        # Ensure activations array is C-contiguous
        t_activations = cpu_contiguous(t_activations)

        t_labels = at.as_tensor_variable(labels)
        t_input_lengths = at.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != "float32":
            raise TypeError("activations must use the float32 type!")

        if t_activations.ndim != 3:
            raise ValueError("activations must have 3 dimensions.")

        if t_labels.type.dtype != "int32":
            raise TypeError("labels must use the int32 type!")

        if t_labels.ndim != 2:
            raise ValueError("labels must have 2 dimensions.")

        if t_input_lengths.type.dtype != "int32":
            raise TypeError("input_lengths must use the int32 type!")

        if t_input_lengths.ndim != 1:
            raise ValueError("input_lengths must have 1 dimension.")

        costs = fvector(name="ctc_cost")
        outputs = [costs]
        if self.compute_grad:
            gradients = ftensor3(name="ctc_grad")
            outputs += [gradients]

        return Apply(self,
                     inputs=[t_activations, t_labels, t_input_lengths],
                     outputs=outputs)
Example #13
0
def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    p = fmatrix()
    u = fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
    assert any(
        [
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ]
    )

    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    assert mval.shape == pval.shape
    if config.cast_policy == "custom":
        assert mval.dtype == pval.dtype
    elif config.cast_policy == "numpy+floatX":
        assert mval.dtype == config.floatX
    elif config.cast_policy == "numpy":
        assert mval.dtype == "float64"
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
Example #14
0
    def test_one_sequence_one_output_weights_gpu2(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode_with_gpu,
        )

        # get random initial values
        rng = np.random.default_rng(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W

        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 1
        assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4

        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(isinstance(node.op, GpuElemwise) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, HostFromGpu) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, GpuFromHost) for node in scan_node_topo)
Example #15
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
        n_in = 4098
        n_out = 4099

    y = lvector("y")

    b = fvector("b")

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = fmatrix("dot_result")

    xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32)
    yy = np.ones((batch_size, ), dtype="int32")
    b_values = np.zeros((n_out, ), dtype="float32")
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32")

    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b)
    y_pred = argmax(p_y_given_x, axis=-1)
    loss = -mean(log(p_y_given_x)[aet.arange(y.shape[0]), y])
    dW = grad(loss, dot_result)
    classify = aesara.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = aesara.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)

    assert any([
        isinstance(node.op,
                   aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
Example #16
0
def test_subgraph_grad():
    # Tests that the grad method with no known_grads
    # matches what happens if you use successive subgraph_grads

    x = fvector("x")
    t = fvector("t")
    w1 = aesara.shared(np.random.randn(3, 4))
    w2 = aesara.shared(np.random.randn(4, 2))
    a1 = tanh(dot(x, w1))
    a2 = tanh(dot(a1, w2))
    cost2 = sqr(a2 - t).sum()
    cost2 += sqr(w2.sum())
    cost1 = sqr(w1.sum())

    params = [[w2], [w1]]
    costs = [cost2, cost1]
    grad_ends = [[a1], [x]]

    inputs = [t, x]
    rng = np.random.RandomState([2012, 11, 15])
    values = [rng.randn(2), rng.randn(3)]
    values = [np.cast[ipt.dtype](value) for ipt, value in zip(inputs, values)]

    wrt = [w2, w1]
    cost = cost2 + cost1
    true_grads = grad(cost, wrt)
    true_grads = aesara.function(inputs, true_grads)
    true_grads = true_grads(*values)
    next_grad = None
    param_grads = []
    for i in range(2):
        param_grad, next_grad = subgraph_grad(wrt=params[i],
                                              end=grad_ends[i],
                                              start=next_grad,
                                              cost=costs[i])
        next_grad = OrderedDict(zip(grad_ends[i], next_grad))
        param_grads.extend(param_grad)

    pgrads = aesara.function(inputs, param_grads)
    pgrads = pgrads(*values)

    for true_grad, pgrad in zip(true_grads, pgrads):
        assert np.sum(np.abs(true_grad - pgrad)) < 0.00001
Example #17
0
    def test_gpu3_mixture_dtype_outputs(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64"))

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            [x0, None],
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=self.mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=self.mode_with_gpu,
        )

        # get random initial values
        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out1 = np.zeros((4, ))
        v_out2 = np.zeros((4, ), dtype="int64")
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in range(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])

        aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_out1, v_out1)
        utt.assert_allclose(aesara_out2, v_out2)

        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        assert self.is_scan_on_gpu(scan_node)
Example #18
0
def test_n_samples_1():
    p = fmatrix()
    u = fvector()
    n = iscalar()
    m = MultinomialFromUniform("auto")(p, u, n)

    f = function([p, u, n], m, allow_input_downcast=True)

    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000, 10000]:
        uni = np.random.rand(2 * i).astype(config.floatX)
        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
Example #19
0
    def test_cloning_replace_not_strict_not_copy_inputs(self):
        # This has nothing to do with scan, but it refers to the clone
        # function that scan uses internally and that pfunc uses now and
        # that users might want to use
        x = vector("x")
        y = fvector("y")
        y2 = dvector("y2")
        z = shared(0.25)

        f1 = z * (x + y) ** 2 + 5
        f2 = clone_replace(
            f1, replace=[(y, y2)], rebuild_strict=False, copy_inputs_over=False
        )
        f2_inp = graph_inputs([f2])
        assert z not in f2_inp
        assert x not in f2_inp
        assert y2 not in f2_inp
Example #20
0
    def test_fail_select_alot(self):
        # Tests that ChoiceFromUniform fails when asked to sample more
        # elements than the actual number of elements

        p = fmatrix()
        u = fvector()
        n = iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 200
        np.random.seed(12345)
        uni = np.random.rand(n_selected).astype(config.floatX)
        pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        with pytest.raises(ValueError):
            f(pvals, uni, n_selected)
Example #21
0
def test_gpu_opt_dtypes():
    # Test if the returned samples are of the datatype specified
    for dtype in ["uint32", "float32", "int64", "float64"]:
        p = fmatrix()
        u = fvector()
        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)

        f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any(
            [
                type(node.op) is GPUAMultinomialFromUniform
                for node in f.maker.fgraph.toposort()
            ]
        )
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        samples = f(pval, uval)
        assert samples.dtype == dtype, f"{samples.dtype} != {dtype}"
Example #22
0
    def test_select_distinct(self):
        # Tests that ChoiceFromUniform always selects distinct elements

        p = fmatrix()
        u = fvector()
        n = iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 1000
        all_indices = range(n_elements)
        np.random.seed(12345)
        for i in [5, 10, 50, 100, 500, n_elements]:
            uni = np.random.rand(i).astype(config.floatX)
            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            res = np.squeeze(res)
            assert len(res) == i, res
            assert np.all(np.in1d(np.unique(res), all_indices)), res
Example #23
0
    def test_GpuCumOp1D(self, mode):
        np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
        op_class = partial(self.op_class, mode=mode)
        block_max_size = self.max_threads_dim0 * 2

        x = fvector("x")
        f = aesara.function([x], op_class(axis=0)(x), mode=self.mode)
        assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]

        # Extensive testing for the first 1025 sizes
        a = np.random.random(1025).astype("float32")
        for i in range(a.shape[0]):
            utt.assert_allclose(np_func(a[:i]), f(a[:i]))

        # Use multiple GPU threadblocks
        a = np.random.random((block_max_size + 2,)).astype("float32")
        utt.assert_allclose(np_func(a), f(a))

        # Use recursive cumop
        a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32")
        utt.assert_allclose(np_func(a), f(a))
Example #24
0
def test_asymptotic_32():
    # This test makes sure that our functions behave sensibly when
    # huge values are present

    # TODO: consider adding the optimization of crossentropy into the current
    # mode for the purpose of running this test

    for dtype in "float32", "float64":
        if dtype == "float32":
            x = fmatrix()
            x2 = fvector()
        else:
            x = dmatrix()
            x2 = dvector()
        y = lvector()

        c = categorical_crossentropy(softmax(x + x2), y)
        f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)],
                            mode="FAST_RUN")

        xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
        x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
        for i in range(100):
            cval, gxval = f(xval, np.arange(5), x2val)
            xval -= 100.3 * gxval

        assert cval == 0  # no problem going to zero error

        # what about when x gets really big?

        xval = np.zeros((5, 5), dtype=dtype)
        x2val = np.zeros(5, dtype=xval.dtype)
        for i in range(100):

            cval, gxval = f(xval, np.arange(5), x2val)
            xval += 100000.3 * gxval

        assert cval > 61750000
        assert gxval[0, 0] == -1.0
        assert gxval[0, 1] == 0.25
Example #25
0
def test_gpu_opt_wor():
    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = fmatrix()
    u = fvector()
    n = iscalar()
    for replace in [False, True]:
        m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u, n)
        assert m.dtype == "int64", m.dtype

        f = function([p, u, n], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any(
            [
                type(node.op) is GPUAChoiceFromUniform
                for node in f.maker.fgraph.toposort()
            ]
        )
        n_samples = 3
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones(pval.shape[0] * n_samples) * 0.5
        f(pval, uval, n_samples)

        # Test with a row, it was failing in the past.
        r = frow()
        m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n)
        assert m.dtype == "int64", m.dtype

        f = function([r, u, n], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any(
            [
                type(node.op) is GPUAChoiceFromUniform
                for node in f.maker.fgraph.toposort()
            ]
        )
        pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        f(pval, uval, 1)
Example #26
0
def test_gpu_opt():
    # Does have some overlap with test_multinomial_0

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = fmatrix()
    u = fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any(
        [
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ]
    )
    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)

    # Test with a row, it was failing in the past.
    r = frow()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u)
    assert m.dtype == "float32", m.dtype

    f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any(
        [
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ]
    )
    pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
Example #27
0
def test_n_samples_2():
    p = fmatrix()
    u = fvector()
    n = iscalar()
    m = MultinomialFromUniform("auto")(p, u, n)

    f = function([p, u, n], m, allow_input_downcast=True)

    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000]:
        uni = np.random.rand(i).astype(config.floatX)
        pvals = np.random.randint(1, 1000, (1, 1000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
        assert res.sum() == i

    for i in [1, 5, 10, 100, 1000]:
        uni = np.random.rand(i).astype(config.floatX)
        pvals = np.random.randint(1, 1000000,
                                  (1, 1000000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
        assert res.sum() == i
Example #28
0
def test_multinomial_large():
    p = fmatrix()
    u = fvector()
    m = MultinomialFromUniform("auto")(p, u)
    f = function([p, u], m * 2, allow_input_downcast=True)

    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    assert mval.shape == pval.shape
    if config.cast_policy == "custom":
        assert mval.dtype == pval.dtype
    elif config.cast_policy == "numpy+floatX":
        assert mval.dtype == config.floatX
    elif config.cast_policy == "numpy":
        assert mval.dtype == "float64"
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
Example #29
0
def test_multinomial_output_dtype():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = fmatrix()
    u = fvector()

    for dtype in ["int64", "float32", "float16", "float64", "int32", "auto"]:
        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)

        # the m*2 allows the multinomial to reuse output
        f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)

        assert any(
            [
                type(node.op) is GPUAMultinomialFromUniform
                for node in f.maker.fgraph.toposort()
            ]
        )

        # test that both first and second samples can be drawn
        utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
        utt.assert_allclose(r, [[0, 2], [0, 2]])

        # test that both first labels can be drawn
        r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
        utt.assert_allclose(r, [[0, 2], [2, 0]])

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[0.2, 0.8]], [0.25])
        utt.assert_allclose(r, [[0, 2]])
Example #30
0
    def test_ravel_multi_index(self):
        def check(shape, index_ndim, mode, order):
            multi_index = np.unravel_index(np.arange(np.product(shape)),
                                           shape,
                                           order=order)
            # create some invalid indices to test the mode
            if mode in ("wrap", "clip"):
                multi_index = (multi_index[0] - 1, ) + multi_index[1:]
            # test with scalars and higher-dimensional indices
            if index_ndim == 0:
                multi_index = tuple(i[-1] for i in multi_index)
            elif index_ndim == 2:
                multi_index = tuple(i[:, np.newaxis] for i in multi_index)
            multi_index_symb = [aesara.shared(i) for i in multi_index]

            # reference result
            ref = np.ravel_multi_index(multi_index, shape, mode, order)

            def fn(mi, s):
                return function([], ravel_multi_index(mi, s, mode, order))

            # shape given as a tuple
            f_array_tuple = fn(multi_index, shape)
            f_symb_tuple = fn(multi_index_symb, shape)
            np.testing.assert_equal(ref, f_array_tuple())
            np.testing.assert_equal(ref, f_symb_tuple())

            # shape given as an array
            shape_array = np.array(shape)
            f_array_array = fn(multi_index, shape_array)
            np.testing.assert_equal(ref, f_array_array())

            # shape given as an Aesara variable
            shape_symb = aesara.shared(shape_array)
            f_array_symb = fn(multi_index, shape_symb)
            np.testing.assert_equal(ref, f_array_symb())

            # shape testing
            self._compile_and_check(
                [],
                [ravel_multi_index(multi_index, shape_symb, mode, order)],
                [],
                RavelMultiIndex,
            )

        for mode in ("raise", "wrap", "clip"):
            for order in ("C", "F"):
                for index_ndim in (0, 1, 2):
                    check((3, ), index_ndim, mode, order)
                    check((3, 4), index_ndim, mode, order)
                    check((3, 4, 5), index_ndim, mode, order)

        # must provide integers
        with pytest.raises(TypeError):
            ravel_multi_index((fvector(), ivector()), (3, 4))
        with pytest.raises(TypeError):
            ravel_multi_index(((3, 4), ivector()), (3.4, 3.2))

        # dims must be a 1D sequence
        with pytest.raises(TypeError):
            ravel_multi_index(((3, 4), ), ((3, 4), ))