Exemple #1
0
    def test_param_allow_downcast_vector_floatX(self):
        a = tensor.fvector("a")
        b = tensor.fvector("b")
        c = tensor.fvector("c")

        f = pfunc(
            [
                In(a, allow_downcast=True),
                In(b, allow_downcast=False),
                In(c, allow_downcast=None),
            ],
            (a + b + c),
        )

        # If the values can be accurately represented, everything is OK
        z = [0]
        assert np.all(f(z, z, z) == 0)

        # If allow_downcast is True, idem
        assert np.allclose(f([0.1], z, z), 0.1)

        # If allow_downcast is False, nope
        with pytest.raises(TypeError):
            f(z, [0.1], z)

        # If allow_downcast is None, like False
        with pytest.raises(TypeError):
            f(z, z, [0.1])
def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float64", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("float64")(p, u)
    assert m.dtype == "float64", m.dtype
Exemple #3
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    y = tt.lvector("y")

    b = tt.fvector("b")

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = tt.fmatrix("dot_result")

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32)
    yy = np.ones((batch_size, ), dtype="int32")
    b_values = np.zeros((n_out, ), dtype="float32")
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32")

    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = tt.nnet.softmax(dot_result + b)
    y_pred = tt.argmax(p_y_given_x, axis=-1)
    loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
    dW = tt.grad(loss, dot_result)
    classify = aesara.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = aesara.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)

    assert any([
        isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
Exemple #4
0
    def make_node(self, activations, labels, input_lengths):
        t_activations = tt.as_tensor_variable(activations)
        # Ensure activations array is C-contiguous
        t_activations = cpu_contiguous(t_activations)

        t_labels = tt.as_tensor_variable(labels)
        t_input_lengths = tt.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != "float32":
            raise TypeError("activations must use the float32 type!")

        if t_activations.ndim != 3:
            raise ValueError("activations must have 3 dimensions.")

        if t_labels.type.dtype != "int32":
            raise TypeError("labels must use the int32 type!")

        if t_labels.ndim != 2:
            raise ValueError("labels must have 2 dimensions.")

        if t_input_lengths.type.dtype != "int32":
            raise TypeError("input_lengths must use the int32 type!")

        if t_input_lengths.ndim != 1:
            raise ValueError("input_lengths must have 1 dimension.")

        costs = tt.fvector(name="ctc_cost")
        outputs = [costs]
        if self.compute_grad:
            gradients = tt.ftensor3(name="ctc_grad")
            outputs += [gradients]

        return gof.Apply(
            self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs
        )
Exemple #5
0
def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])

    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    assert mval.shape == pval.shape
    if config.cast_policy == "custom":
        assert mval.dtype == pval.dtype
    elif config.cast_policy == "numpy+floatX":
        assert mval.dtype == config.floatX
    elif config.cast_policy == "numpy":
        assert mval.dtype == "float64"
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
    def test_select_proportional_to_weight(self):
        # Tests that ChoiceFromUniform selects elements, on average,
        # proportional to the their probabilities

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        np.random.seed(12345)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = np.zeros((n_elements, ), dtype=config.floatX)

        for rep in range(10000):
            uni = np.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff
Exemple #7
0
def test_gpu_opt():
    # Does have some overlap with test_multinomial_0

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u)
    assert m.dtype == "float32", m.dtype

    f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
def test_multinomial_0():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = tensor.fmatrix()
    u = tensor.fvector()

    m = multinomial.MultinomialFromUniform("auto")(p, u)

    # the m*2 allows the multinomial to reuse output
    f = function([p, u], m * 2, allow_input_downcast=True)

    # test that both first and second samples can be drawn
    utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])

    # test that both second labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
    utt.assert_allclose(r, [[0, 2], [0, 2]])

    # test that both first labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
    utt.assert_allclose(r, [[0, 2], [2, 0]])

    # change the size to make sure output gets reallocated ok
    # and also make sure that the GPU version doesn't screw up the
    # transposed-ness
    r = f([[0.2, 0.8]], [0.25])
    utt.assert_allclose(r, [[0, 2]])
    def test_Strides1D(self, mode):
        op_class = partial(self.op_class, mode=mode)
        np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
        x = tt.fvector("x")

        for axis in [0, None, -1]:
            a = np.random.random((42,)).astype("float32")
            cumop_function = aesara.function(
                [x], op_class(axis=axis)(x), mode=self.mode
            )

            slicings = [
                slice(None, None, None),  # Normal strides
                slice(None, None, 2),  # Stepped strides
                slice(None, None, -1),  # Negative strides
            ]

            # Cartesian product of all slicings to test.
            for slicing in product(slicings, repeat=x.ndim):
                f = aesara.function(
                    [x], op_class(axis=axis)(x[slicing]), mode=self.mode
                )
                assert [
                    n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)
                ]
                utt.assert_allclose(np_func(a[slicing], axis=axis), f(a))
                utt.assert_allclose(
                    np_func(a[slicing], axis=axis), cumop_function(a[slicing])
                )
Exemple #10
0
    def test_allow_downcast_floatX(self):
        a = tensor.fscalar("a")
        b = tensor.fvector("b")

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert np.all(f(0, [0]) == 0)
        assert np.all(g(0, [0]) == 0)
        assert np.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert np.allclose(f(0, [0.1]), 0.1)
        with pytest.raises(TypeError):
            g(0, [0.1])
        with pytest.raises(TypeError):
            h(0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert np.allclose(f(0.1, [0]), 0.1)
        with pytest.raises(TypeError):
            g(0.1, [0])
        if config.floatX == "float32":
            assert np.allclose(h(0.1, [0]), 0.1)
        else:
            with pytest.raises(TypeError):
                h(0.1, [0])
    def test_profiling(self):

        config1 = aesara.config.profile
        config2 = aesara.config.profile_memory
        config3 = aesara.config.profiling.min_peak_memory
        try:
            aesara.config.profile = True
            aesara.config.profile_memory = True
            aesara.config.profiling.min_peak_memory = True

            x = [tt.fvector("val%i" % i) for i in range(3)]

            z = []
            z += [tt.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)]
            z += [x[i] + x[i + 1] for i in range(len(x) - 1)]

            p = aesara.ProfileStats(False, gpu_checks=False)

            if aesara.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
                m = "FAST_RUN"
            else:
                m = None

            f = aesara.function(x, z, profile=p, name="test_profiling", mode=m)

            inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))]
            f(*inp)

            buf = StringIO()
            f.profile.summary(buf)

            # regression testing for future algo speed up
            the_string = buf.getvalue()
            lines1 = [l for l in the_string.split("\n") if "Max if linker" in l]
            lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
            if aesara.config.device == "cpu":
                assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
                assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
                assert "CPU: 8208KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4104KB"
                    in the_string
                ), (lines1, lines2)
            else:
                assert "CPU: 16KB (16KB)" in the_string, (lines1, lines2)
                assert "GPU: 8204KB (8204KB)" in the_string, (lines1, lines2)
                assert "GPU: 12300KB (12300KB)" in the_string, (lines1, lines2)
                assert "GPU: 8212KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4116KB"
                    in the_string
                ), (lines1, lines2)

        finally:
            aesara.config.profile = config1
            aesara.config.profile_memory = config2
            aesara.config.profiling.min_peak_memory = config3
def test_n_samples_1():
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
    m = multinomial.MultinomialFromUniform("auto")(p, u, n)

    f = function([p, u, n], m, allow_input_downcast=True)

    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000, 10000]:
        uni = np.random.rand(2 * i).astype(config.floatX)
        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
Exemple #13
0
def test_gpu_opt_dtypes():
    # Test if the returned samples are of the datatype specified
    for dtype in ["uint32", "float32", "int64", "float64"]:
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)

        f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        samples = f(pval, uval)
        assert samples.dtype == dtype, "{} != {}".format(samples.dtype, dtype)
Exemple #14
0
def test_gpu_opt_wor():
    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
    for replace in [False, True]:
        m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u,
                                                                          n)
        assert m.dtype == "int64", m.dtype

        f = function([p, u, n],
                     m,
                     allow_input_downcast=True,
                     mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAChoiceFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        n_samples = 3
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones(pval.shape[0] * n_samples) * 0.5
        f(pval, uval, n_samples)

        # Test with a row, it was failing in the past.
        r = tensor.frow()
        m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n)
        assert m.dtype == "int64", m.dtype

        f = function([r, u, n],
                     m,
                     allow_input_downcast=True,
                     mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAChoiceFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        f(pval, uval, 1)
    def test_fail_select_alot(self):
        # Tests that ChoiceFromUniform fails when asked to sample more
        # elements than the actual number of elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 200
        np.random.seed(12345)
        uni = np.random.rand(n_selected).astype(config.floatX)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        with pytest.raises(ValueError):
            f(pvals, uni, n_selected)
    def test_GpuCumOp1D(self, mode):
        np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
        op_class = partial(self.op_class, mode=mode)
        block_max_size = self.max_threads_dim0 * 2

        x = tt.fvector("x")
        f = aesara.function([x], op_class(axis=0)(x), mode=self.mode)
        assert [n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)]

        # Extensive testing for the first 1025 sizes
        a = np.random.random(1025).astype("float32")
        for i in range(a.shape[0]):
            utt.assert_allclose(np_func(a[:i]), f(a[:i]))

        # Use multiple GPU threadblocks
        a = np.random.random((block_max_size + 2,)).astype("float32")
        utt.assert_allclose(np_func(a), f(a))

        # Use recursive cumop
        a = np.ones((block_max_size * (block_max_size + 1) + 2,), dtype="float32")
        utt.assert_allclose(np_func(a), f(a))
Exemple #17
0
    def test_select_distinct(self):
        # Tests that ChoiceFromUniform always selects distinct elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 1000
        all_indices = range(n_elements)
        np.random.seed(12345)
        for i in [5, 10, 50, 100, 500, n_elements]:
            uni = np.random.rand(i).astype(config.floatX)
            pvals = np.random.randint(1, 100,
                                      (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            res = np.squeeze(res)
            assert len(res) == i, res
            assert np.all(np.in1d(np.unique(res), all_indices)), res
def test_multinomial_large():
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    f = function([p, u], m * 2, allow_input_downcast=True)

    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    assert mval.shape == pval.shape
    if config.cast_policy == "custom":
        assert mval.dtype == pval.dtype
    elif config.cast_policy == "numpy+floatX":
        assert mval.dtype == config.floatX
    elif config.cast_policy == "numpy":
        assert mval.dtype == "float64"
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
def test_n_samples_2():
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
    m = multinomial.MultinomialFromUniform("auto")(p, u, n)

    f = function([p, u, n], m, allow_input_downcast=True)

    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000]:
        uni = np.random.rand(i).astype(config.floatX)
        pvals = np.random.randint(1, 1000, (1, 1000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
        assert res.sum() == i

    for i in [1, 5, 10, 100, 1000]:
        uni = np.random.rand(i).astype(config.floatX)
        pvals = np.random.randint(1, 1000000,
                                  (1, 1000000)).astype(config.floatX)
        pvals /= pvals.sum(1)
        res = f(pvals, uni, i)
        assert res.sum() == i
Exemple #20
0
def test_multinomial_output_dtype():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = tensor.fmatrix()
    u = tensor.fvector()

    for dtype in ["int64", "float32", "float16", "float64", "int32", "auto"]:
        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)

        # the m*2 allows the multinomial to reuse output
        f = function([p, u],
                     m * 2,
                     allow_input_downcast=True,
                     mode=mode_with_gpu)

        assert any([
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ])

        # test that both first and second samples can be drawn
        utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])

        # test that both second labels can be drawn
        r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
        utt.assert_allclose(r, [[0, 2], [0, 2]])

        # test that both first labels can be drawn
        r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
        utt.assert_allclose(r, [[0, 2], [2, 0]])

        # change the size to make sure output gets reallocated ok
        # and also make sure that the GPU version doesn't screw up the
        # transposed-ness
        r = f([[0.2, 0.8]], [0.25])
        utt.assert_allclose(r, [[0, 2]])
    def test_select_distinct(self):
        # Tests that ChoiceFromUniform always selects distinct elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 1000
        all_indices = range(n_elements)
        np.random.seed(12345)
        expected = [
            np.asarray([[931, 318, 185, 209, 559]]),
            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
            np.asarray([[
                546,
                28,
                79,
                665,
                295,
                779,
                433,
                531,
                411,
                716,
                244,
                234,
                70,
                88,
                612,
                639,
                383,
                335,
                451,
                100,
                175,
                492,
                848,
                771,
                559,
                214,
                568,
                596,
                370,
                486,
                855,
                925,
                138,
                300,
                528,
                507,
                730,
                199,
                882,
                357,
                58,
                195,
                705,
                900,
                66,
                468,
                513,
                410,
                816,
                672,
            ]]),
        ]

        for i in [5, 10, 50, 100, 500, n_elements]:
            uni = np.random.rand(i).astype(config.floatX)
            pvals = np.random.randint(1, 100,
                                      (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            for ii in range(len(expected)):
                if expected[ii].shape == res.shape:
                    assert (expected[ii] == res).all()
            res = np.squeeze(res)
            assert len(res) == i
            assert np.all(np.in1d(np.unique(res), all_indices)), res