Example #1
0
File: adam.py Project: wesselb/varz
    def step(self, x, grad):
        """Perform a gradient step.

        Args:
            x (tensor): Current input value. This value will be updated in-place.
            grad (tensor): Current gradient.

        Returns:
            tensor: `x` after updating `x` in-place.
        """
        if self.m is None or self.v is None:
            self.m = B.zeros(x)
            self.v = B.zeros(x)

        # Update estimates of moments.
        self.m *= self.beta1
        self.m += (1 - self.beta1) * grad
        self.v *= self.beta2
        self.v += (1 - self.beta2) * grad ** 2

        # Correct for bias of initialisation.
        m_corr = self.m / (1 - self.beta1 ** (self.i + 1))
        v_corr = self.v / (1 - self.beta2 ** (self.i + 1))

        # Perform update.
        if self.local_rates:
            denom = B.sqrt(B.mean(v_corr)) + self.epsilon
        else:
            denom = B.sqrt(v_corr) + self.epsilon
        x -= self.rate * m_corr / denom

        # Increase iteration number.
        self.i += 1

        return x
Example #2
0
def test_logpdf_missing_data():
    # Setup model.
    m = 3
    noise = 1e-2
    latent_noises = 2e-2 * B.ones(m)
    kernels = [0.5 * EQ().stretch(0.75) for _ in range(m)]
    x = B.linspace(0, 10, 20)

    # Concatenate two orthogonal matrices, to make the missing data
    # approximation exact.
    u1 = B.svd(B.randn(m, m))[0]
    u2 = B.svd(B.randn(m, m))[0]
    u = Dense(B.concat(u1, u2, axis=0) / B.sqrt(2))

    s_sqrt = Diagonal(B.rand(m))

    # Construct a reference model.
    oilmm_pp = ILMMPP(kernels, u @ s_sqrt, noise, latent_noises)

    # Sample to generate test data.
    y = oilmm_pp.sample(x, latent=False)

    # Throw away data, but retain orthogonality.
    y[5:10, 3:] = np.nan
    y[10:, :3] = np.nan

    # Construct OILMM to test.
    oilmm = OILMM(kernels, u, s_sqrt, noise, latent_noises)

    # Check that evidence is still exact.
    approx(oilmm_pp.logpdf(x, y), oilmm.logpdf(x, y), atol=1e-7)
Example #3
0
def sqrt(a: AbstractMatrix):
    if structured(a):
        warn_upmodule(
            f"Taking an element-wise square root of {a}: converting to dense.",
            category=ToDenseWarning,
        )
    return Dense(B.sqrt(B.dense(a)))
Example #4
0
    def predict(self, x, latent=False, return_variances=False):
        """Predict.

        Args:
            x (matrix): Input locations to predict at.
            latent (bool, optional): Predict noiseless processes. Defaults
                to `False`.
            return_variances (bool, optional): Return means and variances
                instead. Defaults to `False`.

        Returns:
            tuple[matrix]: Tuple containing means, lower 95% central credible
                bound, and upper 95% central credible bound if `variances` is
                `False`, and means and variances otherwise.
        """
        mean, var = self.model.predict(x, latent=latent, return_variances=True)

        # Pull means and variances through mixing matrix.
        mean = B.dense(B.matmul(mean, self.h, tr_b=True))
        var = B.dense(B.matmul(var, self.h ** 2, tr_b=True))

        if not latent:
            var = var + self.noise_obs

        if return_variances:
            return mean, var
        else:
            error = 1.96 * B.sqrt(var)
            return mean, mean - error, mean + error
Example #5
0
    def predict(self, x, latent=False, return_variances=False):
        """Predict.

        Args:
            x (matrix): Input locations to predict at.
            latent (bool, optional): Predict noiseless processes. Defaults
                to `False`.
            return_variances (bool, optional): Return means and variances
                instead. Defaults to `False`.

        Returns:
            tuple: Tuple containing means, lower 95% central credible bound,
                and upper 95% central credible bound if `variances` is `False`,
                and means and variances otherwise.
        """
        mean = B.stack(*[B.squeeze(B.dense(f.mean(x))) for f in self.fs], axis=1)
        var = B.stack(*[B.squeeze(f.kernel.elwise(x)) for f in self.fs], axis=1)

        if not latent:
            var = var + self.noises[None, :]

        if return_variances:
            return mean, var
        else:
            error = 1.96 * B.sqrt(var)
            return mean, mean - error, mean + error
Example #6
0
def test_jit_to_numpy(check_lazy_shapes):
    @B.jit
    def f(x):
        available = B.jit_to_numpy(~B.isnan(x))
        return B.sum(x[available])

    x = B.sqrt(B.randn(100))
    approx(f(x), f(jnp.array(x)))
Example #7
0
def safe_sqrt(x):
    """Perform a square root that is safe to use in AD.

    Args:
        x (tensor): Tensor to take square root of.

    Returns:
        tensor: Square root of `x`.
    """
    return B.sqrt(B.maximum(x, B.cast(B.dtype(x), 1e-30)))
Example #8
0
def factor_to_scale(factor):
    """Convert a factor to a length scale.

    Args:
        factor (tensor): Factor to convert to a length scale.

    Returns:
        tensor: Equivalent length scale.
    """
    return 1 / B.sqrt(4 * factor / B.pi)
Example #9
0
def plot_compare(name1, name2, y_label=True, y_ticks=True, style2=None):
    """Compare prediction for the function for two models."""
    _, mean1, var1 = preds_f[name1]
    _, mean2, var2 = preds_f[name2]

    inds = t_pred >= 150
    mean1 = mean1[inds]
    mean2 = mean2[inds]
    var1 = var1[inds]
    var2 = var2[inds]
    t = t_pred[inds]

    plt.plot(t, mean1, style="pred", label=name1.upper())
    plt.fill_between(
        t,
        mean1 - 1.96 * B.sqrt(var1),
        mean1 + 1.96 * B.sqrt(var1),
        style="pred",
    )
    plt.plot(t, mean1 - 1.96 * B.sqrt(var1), style="pred", lw=0.5)
    plt.plot(t, mean1 + 1.96 * B.sqrt(var1), style="pred", lw=0.5)

    if style2 is None:
        style2 = "pred2"

    plt.plot(t, mean2, style=style2, label=name2.upper())
    plt.fill_between(
        t,
        mean2 - 1.96 * B.sqrt(var2),
        mean2 + 1.96 * B.sqrt(var2),
        style=style2,
    )
    plt.plot(t, mean2 - 1.96 * B.sqrt(var2), style=style2, lw=0.5)
    plt.plot(t, mean2 + 1.96 * B.sqrt(var2), style=style2, lw=0.5)

    inds = t_train >= 150
    plt.scatter(
        t_train[inds],
        normaliser.untransform(y_train[inds]),
        style="train",
        label="Train",
    )
    inds = t_test >= 150
    plt.scatter(t_test[inds], y_test[inds], style="test", label="Test")

    plt.xlim(t[0], t[-1])
    plt.xlabel(f"Day of {args.year}")
    if y_label:
        plt.ylabel("Crude Oil (USD)")
    if not y_ticks:
        plt.gca().set_yticklabels([])
    tweak(legend_loc="upper right")
Example #10
0
def root(a: B.Numeric):  # pragma: no cover
    """Compute the positive square root of a positive-definite matrix.

    Args:
        a (matrix): Matrix to compute square root of.

    Returns:
        matrix: Positive square root of `a`.
    """
    _assert_square_root(a)
    u, s, _ = B.svd(a)
    return B.mm(u, B.diag(B.sqrt(s)), u, tr_c=True)
Example #11
0
    def k_h(self):
        """Get the kernel function of the filter.

        Returns:
            :class:`mlkernels.Kernel`: Kernel for :math:`h`.
        """
        # Convert `self.gamma` to a regular length scale.
        gamma_scale = B.sqrt(1 / (2 * self.gamma))
        k_h = EQ().stretch(gamma_scale)  # Kernel of filter before window
        k_h *= lambda t: B.exp(-self.alpha * t**2)  # Window
        if self.causal:
            k_h *= lambda t: B.cast(self.dtype, t >= 0)  # Causality constraint
        return k_h
Example #12
0
    def sample(self, x, latent=False):
        """Sample from the model.

        Args:
            x (matrix): Locations to sample at.
            latent (bool, optional): Sample noiseless processes. Defaults
                to `False`.

        Returns:
            matrix: Sample.
        """
        sample = B.dense(
            B.matmul(self.model.sample(x, latent=latent), self.h, tr_b=True)
        )
        if not latent:
            sample = sample + B.sqrt(self.noise_obs) * B.randn(sample)
        return sample
Example #13
0
def sqrt(a: Constant):
    return Constant(B.sqrt(a.const), a.rows, a.cols)
Example #14
0
def cholesky(a: Constant):
    _assert_square_cholesky(a)
    if a.cholesky is None:
        chol_const = B.divide(B.sqrt(a.const), B.sqrt(a.cols))
        a.cholesky = Constant(chol_const, a.rows, a.cols)
    return a.cholesky
Example #15
0
def cholesky(a: Diagonal):
    _assert_square_cholesky(a)
    if a.cholesky is None:
        a.cholesky = Diagonal(B.sqrt(a.diag))
    return a.cholesky
Example #16
0
def sqrt(a: UpperTriangular):
    return UpperTriangular(B.sqrt(a.mat))
Example #17
0
def sqrt(a: Diagonal):
    return Diagonal(B.sqrt(a.diag))
Example #18
0
def sqrt(a: Kronecker):
    return Kronecker(B.sqrt(a.left), B.sqrt(a.right))
Example #19
0
    def __init__(
        self,
        scheme="structured",
        noise=5e-2,
        fix_noise=False,
        alpha=None,
        alpha_t=None,
        window=None,
        fix_window=False,
        lam=None,
        gamma=None,
        gamma_t=None,
        a=None,
        b=None,
        m_max=None,
        m_max_cap=150,
        n_z=None,
        scale=None,
        fix_scale=False,
        ms=None,
        n_u=None,
        n_u_cap=300,
        t_u=None,
        extend_t_z=None,
        t=None,
    ):
        AbstractGPCM.__init__(self, scheme)

        # Ensure that `t` is a vector.
        if t is not None:
            t = np.array(t)

        # Store whether to fix the length scale, window length, and noise.
        self.fix_scale = fix_scale
        self.fix_window = fix_window
        self.fix_noise = fix_noise

        # First initialise optimisable model parameters.
        if alpha is None:
            alpha = 1 / window

        if alpha_t is None:
            alpha_t = B.sqrt(2 * alpha)

        if lam is None:
            lam = 1 / scale

        self.noise = noise
        self.alpha = alpha
        self.alpha_t = alpha_t
        self.lam = lam

        # For convenience, also store the extent of the filter.
        self.extent = 4 / self.alpha

        # Then initialise fixed variables.
        if t_u is None:
            # Place inducing points until the filter is `exp(-pi) = 4.32%`.
            t_u_max = B.pi / self.alpha

            # `n_u` is required to initialise `t_u`.
            if n_u is None:
                # Set it to two inducing points per wiggle, multiplied by two to account
                # for the longer range.
                n_u = int(np.ceil(2 * 2 * window / scale))
                if n_u > n_u_cap:
                    warnings.warn(
                        f"Using {n_u} inducing points for the filter, which is too "
                        f"many. It is capped to {n_u_cap}.",
                        category=UserWarning,
                    )
                    n_u = n_u_cap

            t_u = B.linspace(0, t_u_max, n_u)

        if n_u is None:
            n_u = B.shape(t_u)[0]

        if a is None:
            a = B.min(t) - B.max(t_u)

        if b is None:
            b = B.max(t)

        # First, try to determine `m_max` from `n_z`.
        if m_max is None and n_z is not None:
            m_max = int(np.ceil(n_z / 2))

        if m_max is None:
            freq = 1 / scale
            m_max = int(np.ceil(freq * (b - a)))
            if m_max > m_max_cap:
                warnings.warn(
                    f"Using {m_max} inducing features, which is too "
                    f"many. It is capped to {m_max_cap}.",
                    category=UserWarning,
                )
                m_max = m_max_cap

        if ms is None:
            ms = B.range(2 * m_max + 1)

        self.a = a
        self.b = b
        self.m_max = m_max
        self.ms = ms
        self.n_z = len(ms)
        self.n_u = n_u
        self.t_u = t_u

        # Initialise dependent model parameters.
        if gamma is None:
            gamma = 1 / (2 * (self.t_u[1] - self.t_u[0]))

        if gamma_t is None:
            gamma_t = B.sqrt(2 * gamma)

        # Must ensure that `gamma < alpha`.
        self.gamma = min(gamma, self.alpha / 1.5)
        self.gamma_t = gamma_t
Example #20
0
def sqrt(a: LowerTriangular):
    return LowerTriangular(B.sqrt(a.mat))