Beispiel #1
0
def test_graph_opt_caching():
    opt_db_file = os.path.join(aesara.config.compiledir,
                               "optimized_graphs.pkl")
    if os.path.exists(opt_db_file):
        os.remove(opt_db_file)

    mode = aesara.config.mode
    if mode in ["DEBUG_MODE", "DebugMode"]:
        mode = "FAST_RUN"
    default = aesara.config.cache_optimizations
    try:
        aesara.config.cache_optimizations = True
        a = tt.fmatrix("a")
        b = tt.fmatrix("b")
        c = aesara.shared(np.ones((10, 10), dtype=floatX))
        d = aesara.shared(np.ones((10, 10), dtype=floatX))
        e = tt.sum(tt.sum(tt.sum(a**2 + b) + c) + d)
        f1 = aesara.function([a, b], e, mode=mode)

        m = tt.fmatrix("x1")
        n = tt.fmatrix("x2")
        p = aesara.shared(np.ones((10, 10), dtype=floatX))
        q = aesara.shared(np.ones((10, 10), dtype=floatX))
        j = tt.sum(tt.sum(tt.sum(m**2 + n) + p) + q)
        f2 = aesara.function([m, n], j, mode=mode)

        in1 = np.ones((10, 10), dtype=floatX)
        in2 = np.ones((10, 10), dtype=floatX)
        assert f1(in1, in2) == f2(in1, in2)
    finally:
        aesara.config.cache_optimizations = default
def test_gpujoin_gpualloc():
    a = tt.fmatrix("a")
    a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
    b = tt.fmatrix("b")
    b_val = np.asarray(np.random.rand(3, 5), dtype="float32")

    f = aesara.function(
        [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_without_gpu
    )
    f_gpu = aesara.function(
        [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)), mode=mode_with_gpu
    )
    f_gpu2 = aesara.function(
        [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_with_gpu
    )
    assert sum([node.op == tt.alloc for node in f.maker.fgraph.toposort()]) == 2
    assert sum([node.op == tt.join_ for node in f.maker.fgraph.toposort()]) == 1
    assert (
        sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()])
        == 2
    )
    assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1
    assert (
        sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()])
        == 2
    )
    assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1
    assert np.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def test_pickle_unpickle_without_reoptimization():
    mode = aesara.config.mode
    if mode in ["DEBUG_MODE", "DebugMode"]:
        mode = "FAST_RUN"
    x1 = tt.fmatrix("x1")
    x2 = tt.fmatrix("x2")
    x3 = aesara.shared(np.ones((10, 10), dtype=floatX))
    x4 = aesara.shared(np.ones((10, 10), dtype=floatX))
    y = tt.sum(tt.sum(tt.sum(x1**2 + x2) + x3) + x4)

    updates = OrderedDict()
    updates[x3] = x3 + 1
    updates[x4] = x4 + 1
    f = aesara.function([x1, x2], y, updates=updates, mode=mode)

    # now pickle the compiled aesara fn
    string_pkl = pickle.dumps(f, -1)

    # compute f value
    in1 = np.ones((10, 10), dtype=floatX)
    in2 = np.ones((10, 10), dtype=floatX)

    # test unpickle without optimization
    default = aesara.config.reoptimize_unpickled_function
    try:
        # the default is True
        aesara.config.reoptimize_unpickled_function = False
        f_ = pickle.loads(string_pkl)
        assert f(in1, in2) == f_(in1, in2)
    finally:
        aesara.config.reoptimize_unpickled_function = default
def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float64", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("float64")(p, u)
    assert m.dtype == "float64", m.dtype
Beispiel #5
0
    def test_softmax_shape_0(self):
        x = tt.fmatrix("x")
        z = tt.nnet.softmax_op

        f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)
        # Aesara can handle that case, but cudnn can't
        self._cmp(0, 10, f, f_gpu)
Beispiel #6
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    y = tt.lvector("y")

    b = tt.fvector("b")

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = tt.fmatrix("dot_result")

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32)
    yy = np.ones((batch_size, ), dtype="int32")
    b_values = np.zeros((n_out, ), dtype="float32")
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32")

    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = tt.nnet.softmax(dot_result + b)
    y_pred = tt.argmax(p_y_given_x, axis=-1)
    loss = -tt.mean(tt.log(p_y_given_x)[tt.arange(y.shape[0]), y])
    dW = tt.grad(loss, dot_result)
    classify = aesara.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = aesara.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)

    assert any([
        isinstance(node.op, tt.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
Beispiel #7
0
    def test_sparseblockgemvF(self):
        # Test the fortan order for W (which can happen in the grad for some
        # graphs).

        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        o = self.gemv_op(
            b.take(oIdx, axis=0),
            tensor.DimShuffle((False, False, False, False), (0, 1, 3, 2))(
                tensor.as_tensor_variable(W)
            ),
            h,
            iIdx,
            oIdx,
        )

        f = aesara.function([W, h, iIdx, b, oIdx], o, mode=self.mode)

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()

        th_out = f(np.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
        ref_out = self.gemv_numpy(
            b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val
        )

        utt.assert_allclose(ref_out, th_out)
Beispiel #8
0
    def test_softmax(self):
        x = tt.fmatrix("x")
        z = tt.nnet.softmax_op

        f, f_gpu = self._test_softmax(x, x, z, z, self._cmp)

        self._cmp(2 << 15, 5, f, f_gpu)
Beispiel #9
0
def test_multinomial_large():
    # DEBUG_MODE will test this on GPU
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    f = function([p, u], m * 2, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])

    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    mval = f(pval, uval)

    assert mval.shape == pval.shape
    if config.cast_policy == "custom":
        assert mval.dtype == pval.dtype
    elif config.cast_policy == "numpy+floatX":
        assert mval.dtype == config.floatX
    elif config.cast_policy == "numpy":
        assert mval.dtype == "float64"
    else:
        raise NotImplementedError(config.cast_policy)
    utt.assert_allclose(mval.sum(axis=1), 2)
    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
    utt.assert_allclose(mval, asdf)  # broadcast over all rows
    def test_select_proportional_to_weight(self):
        # Tests that ChoiceFromUniform selects elements, on average,
        # proportional to the their probabilities

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        np.random.seed(12345)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = np.zeros((n_elements, ), dtype=config.floatX)

        for rep in range(10000):
            uni = np.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff
    def test_select_proportional_to_weight(self):
        # Tests that multinomial_wo_replacement selects elements, on average,
        # proportional to the their probabilities

        th_rng = RandomStreams(12345)

        p = tensor.fmatrix()
        n = tensor.iscalar()
        m = th_rng.multinomial_wo_replacement(pvals=p, n=n)

        f = function([p, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        np.random.seed(12345)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = np.zeros((n_elements, ), dtype=config.floatX)

        for rep in range(10000):
            res = f(pvals, n_selected)
            res = np.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = np.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol
    def test_Strides2D(self, mode):
        np_func = dict(add=np.cumsum, mul=np.cumprod)[mode]
        op_class = partial(self.op_class, mode=mode)
        x = tt.fmatrix("x")

        for axis in [0, 1, None, -1, -2]:
            a = np.random.random((42, 30)).astype("float32")
            cumop_function = aesara.function(
                [x], op_class(axis=axis)(x), mode=self.mode
            )

            slicings = [
                slice(None, None, None),  # Normal strides
                slice(None, None, 2),  # Stepped strides
                slice(None, None, -1),  # Negative strides
            ]

            # Cartesian product of all slicings to test.
            for slicing in product(slicings, repeat=x.ndim):
                f = aesara.function(
                    [x], op_class(axis=axis)(x[slicing]), mode=self.mode
                )
                assert [
                    n for n in f.maker.fgraph.toposort() if isinstance(n.op, GpuCumOp)
                ]
                utt.assert_allclose(np_func(a[slicing], axis=axis), f(a))
                utt.assert_allclose(
                    np_func(a[slicing], axis=axis), cumop_function(a[slicing])
                )
def test_multinomial_0():
    # This tests the MultinomialFromUniform Op directly, not going through the
    # multinomial() call in GPU random generation.

    p = tensor.fmatrix()
    u = tensor.fvector()

    m = multinomial.MultinomialFromUniform("auto")(p, u)

    # the m*2 allows the multinomial to reuse output
    f = function([p, u], m * 2, allow_input_downcast=True)

    # test that both first and second samples can be drawn
    utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])

    # test that both second labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
    utt.assert_allclose(r, [[0, 2], [0, 2]])

    # test that both first labels can be drawn
    r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
    utt.assert_allclose(r, [[0, 2], [2, 0]])

    # change the size to make sure output gets reallocated ok
    # and also make sure that the GPU version doesn't screw up the
    # transposed-ness
    r = f([[0.2, 0.8]], [0.25])
    utt.assert_allclose(r, [[0, 2]])
def test_hostfromgpu_shape_i():
    # Test that the shape is lifted over hostfromgpu

    m = mode_with_gpu.including(
        "local_dot_to_dot22", "local_dot22_to_dot22scalar", "specialize"
    )
    a = tt.fmatrix("a")
    ca = aesara.gpuarray.type.GpuArrayType("float32", (False, False))()
    av = np.asarray(np.random.rand(5, 4), dtype="float32")
    cv = gpuarray.asarray(
        np.random.rand(5, 4), dtype="float32", context=get_context(test_ctx_name)
    )

    f = aesara.function([a], GpuFromHost(test_ctx_name)(a), mode=m)
    assert any(isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort())
    f = aesara.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, tt.opt.Shape_i)
    assert isinstance(topo[1].op, tt.opt.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = aesara.function([ca], host_from_gpu(ca), mode=m)
    assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()]
    f = aesara.function([ca], host_from_gpu(ca).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, aesara.compile.Shape_i)
    assert isinstance(topo[1].op, aesara.compile.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
Beispiel #15
0
def test_gpu_opt():
    # Does have some overlap with test_multinomial_0

    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)

    # Test with a row, it was failing in the past.
    r = tensor.frow()
    m = aesara.sandbox.multinomial.MultinomialFromUniform("auto")(r, u)
    assert m.dtype == "float32", m.dtype

    f = function([r, u], m, allow_input_downcast=True, mode=mode_with_gpu)
    assert any([
        type(node.op) is GPUAMultinomialFromUniform
        for node in f.maker.fgraph.toposort()
    ])
    pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
    pval = pval / pval.sum(axis=1)[:, None]
    uval = np.ones_like(pval[:, 0]) * 0.5
    f(pval, uval)
    def test_incorrect_type(self):

        x = tt.vector("x")
        with pytest.raises(TypeError):
            # Incorrect shape for test value
            x.tag.test_value = np.empty((2, 2))

        x = tt.fmatrix("x")
        with pytest.raises(TypeError):
            # Incorrect dtype (float64) for test value
            x.tag.test_value = np.random.rand(3, 4)
Beispiel #17
0
    def test_gemv_infershape(self):
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        self._compile_and_check(
            [W, h, iIdx, b, oIdx],
            [self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)],
            self.gemv_data(),
            self.gemv_class,
        )
def test_n_samples_1():
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
    m = multinomial.MultinomialFromUniform("auto")(p, u, n)

    f = function([p, u, n], m, allow_input_downcast=True)

    np.random.seed(12345)
    for i in [1, 5, 10, 100, 1000, 10000]:
        uni = np.random.rand(2 * i).astype(config.floatX)
        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
def test_cpu_contiguous():
    a = tt.fmatrix("a")
    i = tt.iscalar("i")
    a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
    f = aesara.function([a, i], cpu_contiguous(a.reshape((5, 4))[::i]))
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, CpuContiguous) for node in topo])
    assert f(a_val, 1).flags["C_CONTIGUOUS"]
    assert f(a_val, 2).flags["C_CONTIGUOUS"]
    assert f(a_val, 3).flags["C_CONTIGUOUS"]
    # Test the grad:

    utt.verify_grad(cpu_contiguous, [np.random.rand(5, 7, 2)])
Beispiel #20
0
    def test_dot_infershape(self):
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        self._compile_and_check(
            [W, h, iIdx, b, oIdx],
            [sparse_block_dot(W, h, iIdx, b, oIdx)],
            self.gemv_data(),
            self.gemv_class,
        )
def test_transfer_cpu_gpu():
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    f = aesara.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = aesara.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
Beispiel #22
0
    def test_complex(self):
        rng = np.random.RandomState(2333)
        m = fmatrix()
        c = complex(m[0], m[1])
        assert c.type == cvector
        r, i = [real(c), imag(c)]
        assert r.type == fvector
        assert i.type == fvector
        f = aesara.function([m], [r, i])

        mval = np.asarray(rng.randn(2, 5), dtype="float32")
        rval, ival = f(mval)
        assert np.all(rval == mval[0]), (rval, mval[0])
        assert np.all(ival == mval[1]), (ival, mval[1])
def test_gpu_contiguous():
    a = tt.fmatrix("a")
    i = tt.iscalar("i")
    a_val = np.asarray(np.random.rand(4, 5), dtype="float32")
    # The reshape is needed otherwise we make the subtensor on the CPU
    # to transfer less data.
    f = aesara.function(
        [a, i], gpu_contiguous(a.reshape((5, 4))[::i]), mode=mode_with_gpu
    )
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])
    assert any([isinstance(node.op, GpuContiguous) for node in topo])
    assert f(a_val, 1).flags.c_contiguous
    assert f(a_val, 2).flags.c_contiguous
    assert f(a_val, 2).flags.c_contiguous
Beispiel #24
0
def test_gpu_opt_dtypes():
    # Test if the returned samples are of the datatype specified
    for dtype in ["uint32", "float32", "int64", "float64"]:
        p = tensor.fmatrix()
        u = tensor.fvector()
        m = aesara.sandbox.multinomial.MultinomialFromUniform(dtype)(p, u)

        f = function([p, u], m, allow_input_downcast=True, mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAMultinomialFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        samples = f(pval, uval)
        assert samples.dtype == dtype, "{} != {}".format(samples.dtype, dtype)
Beispiel #25
0
    def setup_method(self):
        super().setup_method()

        # Sample computation that involves tensors with different numbers
        # of dimensions
        self.input1 = tt.fmatrix()
        self.input2 = tt.fscalar()
        self.output = tt.dot((self.input1 - self.input2),
                             (self.input1 - self.input2).transpose())

        # Declare the conditional breakpoint
        self.breakpointOp = PdbBreakpoint("Sum of output too high")
        self.condition = tt.gt(self.output.sum(), 1000)
        (
            self.monitored_input1,
            self.monitored_input2,
            self.monitored_output,
        ) = self.breakpointOp(self.condition, self.input1, self.input2,
                              self.output)
Beispiel #26
0
def test_GpuCrossentropySoftmax1HotWithBiasDx():
    # This is basic test for GpuCrossentropySoftmax1HotWithBiasDx
    # We check that we loop when their is too much threads

    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.DebugMode):
        n_out = 4099

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    softmax_output_value = np.random.rand(batch_size, n_out).astype("float32")
    dnll_value = np.asarray(np.random.rand(batch_size), dtype="float32")
    y_idx_value = np.random.randint(low=0, high=5, size=batch_size)

    softmax_output = tt.fmatrix()
    softmax_output /= softmax_output.sum(axis=1).reshape(
        softmax_output.shape[1], 1)
    op = crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output,
                                                y_idx_value)

    cpu_f = aesara.function([softmax_output], op, mode=mode_without_gpu)
    gpu_f = aesara.function([softmax_output], op, mode=mode_with_gpu)
    # aesara.printing.debugprint(cpu_f)
    # aesara.printing.debugprint(gpu_f)

    assert any([
        isinstance(node.op, tt.nnet.CrossentropySoftmax1HotWithBiasDx)
        for node in cpu_f.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, GpuCrossentropySoftmax1HotWithBiasDx)
        for node in gpu_f.maker.fgraph.toposort()
    ])

    cpu_out = cpu_f(softmax_output_value)
    gpu_out = gpu_f(softmax_output_value)

    rtol = 1e-5
    atol = 1e-6
    utt.assert_allclose(cpu_out, gpu_out, rtol=rtol, atol=atol)
Beispiel #27
0
def test_gpu_opt_wor():
    # We test the case where we put the op on the gpu when the output
    # is moved to the gpu.
    p = tensor.fmatrix()
    u = tensor.fvector()
    n = tensor.iscalar()
    for replace in [False, True]:
        m = multinomial.ChoiceFromUniform(odtype="auto", replace=replace)(p, u,
                                                                          n)
        assert m.dtype == "int64", m.dtype

        f = function([p, u, n],
                     m,
                     allow_input_downcast=True,
                     mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAChoiceFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        n_samples = 3
        pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones(pval.shape[0] * n_samples) * 0.5
        f(pval, uval, n_samples)

        # Test with a row, it was failing in the past.
        r = tensor.frow()
        m = multinomial.ChoiceFromUniform("auto", replace=replace)(r, u, n)
        assert m.dtype == "int64", m.dtype

        f = function([r, u, n],
                     m,
                     allow_input_downcast=True,
                     mode=mode_with_gpu)
        assert any([
            type(node.op) is GPUAChoiceFromUniform
            for node in f.maker.fgraph.toposort()
        ])
        pval = np.arange(1 * 4, dtype="float32").reshape((1, 4)) + 0.1
        pval = pval / pval.sum(axis=1)[:, None]
        uval = np.ones_like(pval[:, 0]) * 0.5
        f(pval, uval, 1)
    def test_fail_select_alot(self):
        # Tests that ChoiceFromUniform fails when asked to sample more
        # elements than the actual number of elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 200
        np.random.seed(12345)
        uni = np.random.rand(n_selected).astype(config.floatX)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        with pytest.raises(ValueError):
            f(pvals, uni, n_selected)
    def test_fail_select_alot(self):
        # Tests that multinomial_wo_replacement fails when asked to sample more
        # elements than the actual number of elements

        th_rng = RandomStreams(12345)

        p = tensor.fmatrix()
        n = tensor.iscalar()
        m = th_rng.multinomial_wo_replacement(pvals=p, n=n)

        f = function([p, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 200
        np.random.seed(12345)
        pvals = np.random.randint(1, 100,
                                  (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        with pytest.raises(ValueError):
            f(pvals, n_selected)
Beispiel #30
0
    def test_sparseblockgemv_grad_shape(self):
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
        go = aesara.grad(o.sum(), [b, W, h])

        f = aesara.function([W, h, iIdx, b, oIdx], go, mode=self.mode)

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()

        # just make sure that it runs correcly and all the shapes are ok.
        b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)

        assert b_g.shape == b_val.shape
        assert h_g.shape == h_val.shape
        assert W_g.shape == W_val.shape