Exemple #1
0
 def full(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     scf_x = self.scaling_func(X, self.args)
     if Xs is None:
         return at.outer(scf_x, scf_x) * self.cov_func(X)
     else:
         scf_xs = self.scaling_func(Xs, self.args)
         return at.outer(scf_x, scf_xs) * self.cov_func(X, Xs)
Exemple #2
0
 def forward(self):
     z = self.z0  # sxd
     u = self.u_  # d
     w = self.w_  # d
     b = self.b  # .
     h = self.h  # f
     # h(sxd \dot d + .)  = s
     if not self.batched:
         hwz = h(z.dot(w) + b)  # s
         # sxd + (s \outer d) = sxd
         z1 = z + aet.outer(hwz, u)  # sxd
         return z1
     else:
         z = z.swapaxes(0, 1)
         # z bxsxd
         # u bxd
         # w bxd
         b = b.dimshuffle(0, "x")
         # b bx-
         hwz = h(aet.batched_dot(z, w) + b)  # bxs
         # bxsxd + (bxsx- * bx-xd) = bxsxd
         hwz = hwz.dimshuffle(0, 1, "x")  # bxsx-
         u = u.dimshuffle(0, "x", 1)  # bx-xd
         z1 = z + hwz * u  # bxsxd
         return z1.swapaxes(0, 1)  # sxbxd
    def test_profiling(self):

        config1 = config.profile
        config2 = config.profile_memory
        config3 = config.profiling__min_peak_memory
        try:
            config.profile = True
            config.profile_memory = True
            config.profiling__min_peak_memory = True

            x = [fvector("val%i" % i) for i in range(3)]

            z = []
            z += [
                aet.outer(x[i], x[i + 1]).sum(axis=1)
                for i in range(len(x) - 1)
            ]
            z += [x[i] + x[i + 1] for i in range(len(x) - 1)]

            p = ProfileStats(False, gpu_checks=False)

            if config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
                m = "FAST_RUN"
            else:
                m = None

            f = function(x, z, profile=p, name="test_profiling", mode=m)

            inp = [np.arange(1024, dtype="float32") + 1 for i in range(len(x))]
            f(*inp)

            buf = StringIO()
            f.profile.summary(buf)

            # regression testing for future algo speed up
            the_string = buf.getvalue()
            lines1 = [
                l for l in the_string.split("\n") if "Max if linker" in l
            ]
            lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
            if config.device == "cpu":
                assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
                assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
                assert "CPU: 8208KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4104KB"
                    in the_string), (lines1, lines2)
            else:
                assert "CPU: 16KB (16KB)" in the_string, (lines1, lines2)
                assert "GPU: 8204KB (8204KB)" in the_string, (lines1, lines2)
                assert "GPU: 12300KB (12300KB)" in the_string, (lines1, lines2)
                assert "GPU: 8212KB" in the_string, (lines1, lines2)
                assert (
                    "Minimum peak from all valid apply node order is 4116KB"
                    in the_string), (lines1, lines2)

        finally:
            config.profile = config1
            config.profile_memory = config2
            config.profiling__min_peak_memory = config3
Exemple #4
0
 def test_A_plus_scaled_outer(self):
     skip_if_blas_ldflags_empty()
     f = self.function(
         [self.A, self.x, self.y], self.A + 0.1 * at.outer(self.x, self.y)
     )
     self.assertFunctionContains(f, CGer(destructive=False))
     self.run_f(f)  # DebugMode tests correctness
 def L_op(self, inputs, outputs, output_gradients):
     # Modified from aesara/tensor/slinalg.py
     A, b = inputs
     c = outputs[0]
     c_bar = output_gradients[0]
     # FIXME: triangular structure would use GpuCublasTriangularsolve?
     # no need to handle A_structure like slinalg.py?
     trans_solve_op = GpuCusolverSolve("general")
     b_bar = trans_solve_op(A.T, c_bar)
     A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
     return [A_bar, b_bar]
Exemple #6
0
 def full(self, X, Xs=None):
     X, Xs = self._slice(X, Xs)
     rx = self.lfunc(at.as_tensor_variable(X), self.args)
     if Xs is None:
         rz = self.lfunc(at.as_tensor_variable(X), self.args)
         r2 = self.square_dist(X, X)
     else:
         rz = self.lfunc(at.as_tensor_variable(Xs), self.args)
         r2 = self.square_dist(X, Xs)
     rx2 = at.reshape(at.square(rx), (-1, 1))
     rz2 = at.reshape(at.square(rz), (1, -1))
     return at.sqrt((2.0 * at.outer(rx, rz)) / (rx2 + rz2)) * at.exp(-1.0 * r2 / (rx2 + rz2))
Exemple #7
0
 def __init__(self, v=None, **kwargs):
     super().__init__(**kwargs)
     v = self.add_param(v, "v")
     self.shared_params = dict(v=v)
     if self.batched:
         vv = v.dimshuffle(0, 1, "x") * v.dimshuffle(0, "x", 1)
         I = aet.eye(self.dim).dimshuffle("x", 0, 1)
         vvn = (1e-10 + (v**2).sum(-1)).dimshuffle(0, "x", "x")
     else:
         vv = aet.outer(v, v)
         I = aet.eye(self.dim)
         vvn = (v**2).sum(-1) + 1e-10
     self.H = I - 2.0 * vv / vvn
    def grad(self, inputs, cost_grad):
        """
        In defining the gradient, the Finite Fourier Transform is viewed as
        a complex-differentiable function of a complex variable
        """
        a = inputs[0]
        n = inputs[1]
        axis = inputs[2]
        grad = cost_grad[0]
        if not isinstance(axis, tensor.TensorConstant):
            raise NotImplementedError(
                "%s: gradient is currently implemented"
                " only for axis being a Aesara constant" %
                self.__class__.__name__)
        axis = int(axis.data)
        # notice that the number of actual elements in wrto is independent of
        # possible padding or truncation:
        elem = tensor.arange(0, tensor.shape(a)[axis], 1)
        # accounts for padding:
        freq = tensor.arange(0, n, 1)
        outer = tensor.outer(freq, elem)
        pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1.0 * n))
        res = tensor.tensordot(grad, pow_outer, (axis, 0))

        # This would be simpler but not implemented by aesara:
        # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
        # tensor.set_subtensor(res[...,n::], 0, False, False), res)

        # Instead we resort to that to account for truncation:
        flip_shape = list(np.arange(0, a.ndim)[::-1])
        res = res.dimshuffle(flip_shape)
        res = tensor.switch(
            tensor.lt(n,
                      tensor.shape(a)[axis]),
            tensor.set_subtensor(
                res[n::, ],
                0,
                False,
                False,
            ),
            res,
        )
        res = res.dimshuffle(flip_shape)

        # insures that gradient shape conforms to input shape:
        out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] +
                     list(np.arange(axis, a.ndim - 1)))
        res = res.dimshuffle(*out_shape)
        return [res, None, None]
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from aesara/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = tensor.tril(A_bar)
        else:
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
def test_not_inplace():
    # Test that we can remove optimizers including inplace optimizers

    nan_detected = [False]

    def detect_nan(fgraph, i, node, fn):
        for output in fn.outputs:
            if np.isnan(output[0]).any():
                print("*** NaN detected ***")
                debugprint(node)
                print("Inputs : %s" % [input[0] for input in fn.inputs])
                print("Outputs: %s" % [output[0] for output in fn.outputs])
                nan_detected[0] = True
                break

    x = vector("x")
    mode = MonitorMode(post_func=detect_nan)
    # mode = mode.excluding('fusion', 'inplace')
    mode = mode.excluding("local_elemwise_fusion",
                          "inplace_elemwise_optimizer")
    o = outer(x, x)
    out = log(o) * o
    f = function([x], [out], mode=mode)

    # Test that the fusion wasn't done
    assert len(f.maker.fgraph.apply_nodes) == 5
    assert not f.maker.fgraph.toposort()[-1].op.destroy_map
    try:
        old_stdout = sys.stdout
        sys.stdout = StringIO()
        f([0, 0])  # log(0) * 0 = -inf * 0 = NaN
    finally:
        sys.stdout = old_stdout

    # Test that we still detect the nan
    assert nan_detected[0]
Exemple #11
0
 def test_optimization_pipeline_float(self):
     skip_if_blas_ldflags_empty()
     self.manual_setup_method("float32")
     f = self.function([self.x, self.y], aet.outer(self.x, self.y))
     self.assertFunctionContains(f, CGer(destructive=True))
     f(self.xval, self.yval)  # DebugMode tests correctness
Exemple #12
0
 def test_int_fails(self):
     self.manual_setup_method("int32")
     f = self.function([self.x, self.y], aet.outer(self.x, self.y))
     self.assertFunctionContains0(f, CGer(destructive=True))
     self.assertFunctionContains0(f, CGer(destructive=False))
 def test_outer(self):
     f = self.function([self.x, self.y], tensor.outer(self.x, self.y))
     self.assertFunctionContains(f, ScipyGer(destructive=True))
Exemple #14
0
def flat_outer(a, b):
    return at.outer(a, b).ravel()
 def test_A_plus_scaled_outer(self):
     f = self.function(
         [self.A, self.x, self.y], self.A + 0.1 * tensor.outer(self.x, self.y)
     )
     self.assertFunctionContains(f, ScipyGer(destructive=False))
     self.run_f(f)  # DebugMode tests correctness
 def test_scaled_A_plus_scaled_outer(self):
     f = self.function(
         [self.A, self.x, self.y], 0.2 * self.A + 0.1 * tensor.outer(self.x, self.y)
     )
     self.assertFunctionContains(f, gemm_no_inplace)
     self.run_f(f)  # DebugMode tests correctness
Exemple #17
0
 def test_optimization_pipeline(self):
     skip_if_blas_ldflags_empty()
     f = self.function([self.x, self.y], at.outer(self.x, self.y))
     self.assertFunctionContains(f, CGer(destructive=True))
     f(self.xval, self.yval)  # DebugMode tests correctness