Example #1
0
    def L_op(self, inputs, outputs, output_gradients):
        r"""Reverse-mode gradient updates for matrix solve operation :math:`c = A^{-1} b`.

        Symbolic expression for updates taken from [#]_.

        References
        ----------
        .. [#] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs

        c = outputs[0]
        # C is a scalar representing the entire graph
        # `output_gradients` is (dC/dc,)
        # We need to return (dC/d[inv(A)], dC/db)
        c_bar = output_gradients[0]

        trans_solve_op = type(self)(**{
            k: (not getattr(self, k) if k == "lower" else getattr(self, k))
            for k in self.__props__
        })
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -atm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        return [A_bar, b_bar]
Example #2
0
    def L_op(self, inputs, outputs, output_gradients):
        r"""
        Reverse-mode gradient updates for matrix solve operation c = A \\\ b.

        Symbolic expression for updates taken from [#]_.

        References
        ----------
        .. [#] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]
        trans_map = {
            "lower_triangular": "upper_triangular",
            "upper_triangular": "lower_triangular",
        }
        trans_solve_op = Solve(
            # update A_structure and lower to account for a transpose operation
            A_structure=trans_map.get(self.A_structure, self.A_structure),
            lower=not self.lower,
        )
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -tm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
        if self.A_structure == "lower_triangular":
            A_bar = aet.tril(A_bar)
        elif self.A_structure == "upper_triangular":
            A_bar = aet.triu(A_bar)
        return [A_bar, b_bar]
Example #3
0
 def L_op(self, inputs, outputs, output_gradients):
     # Modified from aesara/tensor/slinalg.py
     A, b = inputs
     c = outputs[0]
     c_bar = output_gradients[0]
     # FIXME: triangular structure would use GpuCublasTriangularsolve?
     # no need to handle A_structure like slinalg.py?
     trans_solve_op = GpuCusolverSolve("general")
     b_bar = trans_solve_op(A.T, c_bar)
     A_bar = -tm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
     return [A_bar, b_bar]
Example #4
0
    def grad(self, inputs, cost_grad):
        """
        In defining the gradient, the Finite Fourier Transform is viewed as
        a complex-differentiable function of a complex variable
        """
        a = inputs[0]
        n = inputs[1]
        axis = inputs[2]
        grad = cost_grad[0]
        if not isinstance(axis, TensorConstant):
            raise NotImplementedError(
                f"{self.__class__.__name__}: gradient is currently implemented"
                " only for axis being an Aesara constant")
        axis = int(axis.data)
        # notice that the number of actual elements in wrto is independent of
        # possible padding or truncation:
        elem = arange(0, shape(a)[axis], 1)
        # accounts for padding:
        freq = arange(0, n, 1)
        outer_res = outer(freq, elem)
        pow_outer = exp(((-2 * math.pi * 1j) * outer_res) / (1.0 * n))
        res = tensordot(grad, pow_outer, (axis, 0))

        # This would be simpler but not implemented by aesara:
        # res = switch(lt(n, shape(a)[axis]),
        # set_subtensor(res[...,n::], 0, False, False), res)

        # Instead we resort to that to account for truncation:
        flip_shape = list(np.arange(0, a.ndim)[::-1])
        res = res.dimshuffle(flip_shape)
        res = switch(
            lt(n,
               shape(a)[axis]),
            set_subtensor(
                res[n::, ],
                0,
                False,
                False,
            ),
            res,
        )
        res = res.dimshuffle(flip_shape)

        # insures that gradient shape conforms to input shape:
        out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] +
                     list(np.arange(axis, a.ndim - 1)))
        res = res.dimshuffle(*out_shape)
        return [res, None, None]
Example #5
0
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from aesara/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = aet.tril(A_bar)
        else:
            A_bar = aet.triu(A_bar)
        return [A_bar, b_bar]
Example #6
0
def kron(a, b):
    """Kronecker product.

    Same as scipy.linalg.kron(a, b).

    Parameters
    ----------
    a: array_like
    b: array_like

    Returns
    -------
    array_like with a.ndim + b.ndim - 2 dimensions

    Notes
    -----
    numpy.kron(a, b) != scipy.linalg.kron(a, b)!
    They don't have the same shape and order when
    a.ndim != b.ndim != 2.

    """
    a = as_tensor_variable(a)
    b = as_tensor_variable(b)
    if a.ndim + b.ndim <= 2:
        raise TypeError(
            "kron: inputs dimensions must sum to 3 or more. "
            f"You passed {int(a.ndim)} and {int(b.ndim)}."
        )
    o = atm.outer(a, b)
    o = o.reshape(at.concatenate((a.shape, b.shape)), a.ndim + b.ndim)
    shf = o.dimshuffle(0, 2, 1, *list(range(3, o.ndim)))
    if shf.ndim == 3:
        shf = o.dimshuffle(1, 0, 2)
        o = shf.flatten()
    else:
        o = shf.reshape(
            (o.shape[0] * o.shape[2], o.shape[1] * o.shape[3])
            + tuple(o.shape[i] for i in range(4, o.ndim))
        )
    return o
Example #7
0
 def test_scaled_A_plus_scaled_outer(self):
     f = self.function([self.A, self.x, self.y],
                       0.2 * self.A + 0.1 * outer(self.x, self.y))
     self.assertFunctionContains(f, gemm_no_inplace)
     self.run_f(f)  # DebugMode tests correctness
Example #8
0
 def test_A_plus_scaled_outer(self):
     f = self.function([self.A, self.x, self.y],
                       self.A + 0.1 * outer(self.x, self.y))
     self.assertFunctionContains(f, ScipyGer(destructive=False))
     self.run_f(f)  # DebugMode tests correctness
Example #9
0
 def test_outer(self):
     f = self.function([self.x, self.y], outer(self.x, self.y))
     self.assertFunctionContains(f, ScipyGer(destructive=True))