예제 #1
0
    def test_param_allow_downcast_floatX(self):
        a = fscalar("a")
        b = fscalar("b")
        c = fscalar("c")

        f = pfunc(
            [
                In(a, allow_downcast=True),
                In(b, allow_downcast=False),
                In(c, allow_downcast=None),
            ],
            (a + b + c),
        )

        # If the values can be accurately represented, everything is OK
        assert np.all(f(0, 0, 0) == 0)

        # If allow_downcast is True, idem
        assert np.allclose(f(0.1, 0, 0), 0.1)

        # If allow_downcast is False, nope
        with pytest.raises(TypeError):
            f(0, 0.1, 0)

        # If allow_downcast is None, it should work iff floatX=float32
        if config.floatX == "float32":
            assert np.allclose(f(0, 0, 0.1), 0.1)
        else:
            with pytest.raises(TypeError):
                f(0, 0, 0.1)
예제 #2
0
    def test_one_sequence_one_output_weights_gpu2(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode_with_gpu,
        )

        # get random initial values
        rng = np.random.default_rng(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W

        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 1
        assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4

        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(isinstance(node.op, GpuElemwise) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, HostFromGpu) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, GpuFromHost) for node in scan_node_topo)
예제 #3
0
def test_mean(mode):
    a = iscalar("a")
    b = iscalar("b")
    z = mean(a, b)
    z_fn = aesara.function([a, b], z, mode=mode)
    res = z_fn(1, 1)
    assert np.allclose(res, 1.0)

    a = fscalar("a")
    b = fscalar("b")
    c = fscalar("c")

    z = mean(a, b, c)

    z_fn = aesara.function([a, b, c], aesara.grad(z, [a]), mode=mode)
    res = z_fn(3, 4, 5)
    assert np.allclose(res, 1 / 3)

    z_fn = aesara.function([a, b, c], aesara.grad(z, [b]), mode=mode)
    res = z_fn(3, 4, 5)
    assert np.allclose(res, 1 / 3)

    z = mean()
    z_fn = aesara.function([], z, mode=mode)
    assert z_fn() == 0
예제 #4
0
    def test_gpu3_mixture_dtype_outputs(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64"))

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            [x0, None],
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=self.mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=self.mode_with_gpu,
        )

        # get random initial values
        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out1 = np.zeros((4, ))
        v_out2 = np.zeros((4, ), dtype="int64")
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in range(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])

        aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_out1, v_out1)
        utt.assert_allclose(aesara_out2, v_out2)

        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        assert self.is_scan_on_gpu(scan_node)
예제 #5
0
    def test_allow_downcast_floatX(self):
        a = fscalar("a")
        b = fvector("b")

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert np.all(f(0, [0]) == 0)
        assert np.all(g(0, [0]) == 0)
        assert np.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert np.allclose(f(0, [0.1]), 0.1)
        with pytest.raises(TypeError):
            g(0, [0.1])
        with pytest.raises(TypeError):
            h(0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert np.allclose(f(0.1, [0]), 0.1)
        with pytest.raises(TypeError):
            g(0.1, [0])
        if config.floatX == "float32":
            assert np.allclose(h(0.1, [0]), 0.1)
        else:
            with pytest.raises(TypeError):
                h(0.1, [0])
예제 #6
0
    def test_copy_delete_updates(self):
        w = iscalar("w")
        x = fscalar("x")
        # SharedVariable for tests, one of them has update
        y = shared(value=1, name="y")
        z = shared(value=2, name="z")
        out = x + y + z

        # Test for different linkers
        # for mode in ["FAST_RUN","FAST_COMPILE"]:
        # second_time = False
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = function([x], out, mode=mode, updates={z: z * 2})
            cpy = ori.copy(delete_updates=True)

            assert cpy(1)[0] == 4
            assert cpy(1)[0] == 4
            assert cpy(1)[0] == 4

        # Test if unused implicit and explicit inputs from delete_updates
        # are ignored as intended.
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = function([x], x, mode=mode, updates={z: z * 2})
            cpy = ori.copy(delete_updates=True)

            ori = function([x, w], x, mode=mode, updates={z: z + w})
            cpy = ori.copy(delete_updates=True)
예제 #7
0
    def test_copy_share_memory(self):
        x = fscalar("x")
        # SharedVariable for tests, one of them has update
        y = shared(value=1)
        z = shared(value=2)
        out = tanh((x + y + 2) / (x + z - 0.2) ** 2)

        # Test for different linkers
        for mode in ["FAST_RUN", "FAST_COMPILE"]:
            ori = function([x], [out], mode=mode, updates={z: z + 1})
            cpy = ori.copy(share_memory=True)

            # Test if memories shared
            storage_map_ori = ori.fn.storage_map
            storage_map_cpy = cpy.fn.storage_map
            fgraph_cpy = cpy.maker.fgraph

            # Assert intermediate and Constants storages are shared.
            # and output stoarges are not shared
            i_o_variables = fgraph_cpy.inputs + fgraph_cpy.outputs
            ori_storages = storage_map_ori.values()
            l = [
                val
                for key, val in storage_map_cpy.items()
                if key not in i_o_variables or isinstance(key, Constant)
            ]
            for storage in l:
                assert any([storage is s for s in ori_storages])

            # Assert storages of SharedVariable without updates are shared
            for (input, _1, _2), here, there in zip(
                ori.indices, ori.input_storage, cpy.input_storage
            ):
                assert here.data is there.data
예제 #8
0
def test_grad_abs():
    a = fscalar("a")
    b = aesara.tensor.nnet.relu(a)
    c = aesara.grad(b, a)
    f = aesara.function([a], c, mode=Mode(optimizer=None))
    # Currently Aesara return 0.5, but it isn't sure it won't change
    # in the futur.
    ret = f(0.0)
    assert ret == 0.5, ret
예제 #9
0
def test_grad_inrange():
    for bound_definition in [(True, True), (False, False)]:
        # Instantiate op, and then take the gradient
        op = InRange(*bound_definition)
        x = fscalar("x")
        low = fscalar("low")
        high = fscalar("high")
        out = op(x, low, high)
        gx, glow, ghigh = aesara.gradient.grad(out, [x, low, high])

        # We look if the gradient are equal to zero
        # if x is lower than the lower bound,
        # equal to the lower bound, between lower and higher bound,
        # equal to the higher bound and higher than the higher
        # bound.
        # Mathematically we should have an infinite gradient when
        # x is equal to the lower or higher bound but in that case
        # Aesara defines the gradient to be zero for stability.
        f = aesara.function([x, low, high], [gx, glow, ghigh])
        utt.assert_allclose(f(0, 1, 5), [0, 0, 0])
        utt.assert_allclose(f(1, 1, 5), [0, 0, 0])
        utt.assert_allclose(f(2, 1, 5), [0, 0, 0])
        utt.assert_allclose(f(5, 1, 5), [0, 0, 0])
        utt.assert_allclose(f(7, 1, 5), [0, 0, 0])
예제 #10
0
    def test_downcast_dtype(self):
        # Test that the gradient of a cost wrt a float32 variable does not
        # get upcasted to float64.
        # x has dtype float32, regardless of the value of floatX
        x = fscalar("x")
        y = x * 2
        z = lscalar("z")

        c = y + z
        dc_dx, dc_dy, dc_dz, dc_dc = grad(c, [x, y, z, c])
        # The dtype of dc_dy and dc_dz can be either float32 or float64,
        # that might depend on floatX, but is not specified.
        assert dc_dc.dtype in ("float32", "float64")
        assert dc_dz.dtype in ("float32", "float64")
        assert dc_dy.dtype in ("float32", "float64")

        # When the output gradient of y is passed to op.grad, it should
        # be downcasted to float32, so dc_dx should also be float32
        assert dc_dx.dtype == "float32"
예제 #11
0
    def setup_method(self):
        super().setup_method()

        # Sample computation that involves tensors with different numbers
        # of dimensions
        self.input1 = fmatrix()
        self.input2 = fscalar()
        self.output = dot((self.input1 - self.input2),
                          (self.input1 - self.input2).transpose())

        # Declare the conditional breakpoint
        self.breakpointOp = PdbBreakpoint("Sum of output too high")
        self.condition = gt(self.output.sum(), 1000)
        (
            self.monitored_input1,
            self.monitored_input2,
            self.monitored_output,
        ) = self.breakpointOp(self.condition, self.input1, self.input2,
                              self.output)
예제 #12
0
    def test_one_sequence_one_output_weights_gpu1(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")

        # The following line is needed to have the first case being used
        # Otherwise, it is the second that is tested.
        mode = self.mode_with_gpu.excluding("InputToGpuOptimizer")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode,
        )

        output = self.gpu_backend.gpu_from_host(output)
        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=self.mode_with_gpu,
        )

        # get random initial values
        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        v_u = np.asarray(v_u, dtype="float32")
        v_x0 = np.asarray(v_x0, dtype="float32")
        W = np.asarray(W, dtype="float32")
        W_in = np.asarray(W_in, dtype="float32")

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        # TO DEL
        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]

        topo = f2.maker.fgraph.toposort()
        assert (sum([
            isinstance(node.op, self.gpu_backend.HostFromGpu) for node in topo
        ]) == 0)
        assert (sum([
            isinstance(node.op, self.gpu_backend.GpuFromHost) for node in topo
        ]) == 4)

        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any([
            isinstance(node.op, self.gpu_backend.GpuElemwise)
            for node in scan_node_topo
        ])
        assert not any([
            isinstance(node.op, self.gpu_backend.HostFromGpu)
            for node in scan_node_topo
        ])
        assert not any([
            isinstance(node.op, self.gpu_backend.GpuFromHost)
            for node in scan_node_topo
        ])