예제 #1
0
def test_dtype_wb_promotion():
    wb = LowRank(B.ones(int, 5, 5)) + Diagonal(B.ones(int, 5))
    assert B.dtype(wb) == np.int64
    wb = LowRank(B.ones(float, 5, 5)) + Diagonal(B.ones(int, 5))
    assert B.dtype(wb) == np.float64
    wb = LowRank(B.ones(int, 5, 5)) + Diagonal(B.ones(float, 5))
    assert B.dtype(wb) == np.float64
예제 #2
0
def test_kronecker_formatting():
    left = Diagonal(B.ones(2))
    right = Diagonal(B.ones(3))
    assert (str(Kronecker(
        left, right)) == "<Kronecker product: shape=6x6, dtype=float64>")
    assert (repr(Kronecker(
        left, right)) == "<Kronecker product: shape=6x6, dtype=float64\n"
            " left=<diagonal matrix: shape=2x2, dtype=float64\n"
            "       diag=[1. 1.]>\n"
            " right=<diagonal matrix: shape=3x3, dtype=float64\n"
            "        diag=[1. 1. 1.]>>")
예제 #3
0
    def _compute(self, measure):
        # Extract processes and inputs.
        p_x, x = self.fdd.p, self.fdd.x
        p_z, z = self.u.p, self.u.x

        # Construct the necessary kernel matrices.
        K_zx = measure.kernels[p_z, p_x](z, x)
        K_z = convert(measure.kernels[p_z](z), AbstractMatrix)
        self._K_z_store[id(measure)] = K_z

        # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a
        # `MultiInput`, because `x` then still designates the particular components
        # of `f`. Fix that by instead designating the elements of `e`.
        if isinstance(x, MultiInput):
            x_n = MultiInput(*(e(fdd.x)
                               for e, fdd in zip(self.e.kernel.ps, x.get())))
        else:
            x_n = x

        # Construct the noise kernel matrix.
        K_n = self.e.kernel(x_n)

        # The approximation can only handle diagonal noise matrices.
        if not isinstance(K_n, Diagonal):
            raise RuntimeError("Kernel matrix of noise must be diagonal.")

        # And construct the components for the inducing point approximation.
        L_z = B.cholesky(K_z)
        A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx))))
        self._A_store[id(measure)] = A
        y_bar = uprank(self.y) - self.e.mean(x_n) - measure.means[p_x](x)
        prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar))

        # Compute the optimal mean.
        mu = B.add(
            measure.means[p_z](z),
            B.iqf(A, B.solve(L_z, K_z), prod_y_bar),
        )
        self._mu_store[id(measure)] = mu

        # Compute the ELBO.
        # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal.
        # The rest, however, is completely generic.
        trace_part = B.ratio(
            Diagonal(measure.kernels[p_x].elwise(x)[:, 0]) -
            Diagonal(B.iqf_diag(K_z, K_zx)),
            K_n,
        )
        det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A)
        iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0]
        self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part +
                                                iqf_part)
예제 #4
0
 def __call__(self, x, y):
     w_x, w_y = x.w, y.w
     x, y = x.get(), y.get()
     if x is y:
         return Diagonal(1 / w_x)
     else:
         return Zero(B.dtype(x), num_elements(x), num_elements(y))
예제 #5
0
    def diagonalise(self):
        """Diagonalise the normal distribution by setting the correlations to zero.

        Returns:
            :class:`.Normal`: Diagonal version of the distribution.
        """
        return Normal(self.mean, Diagonal(self.var_diag))
예제 #6
0
def _noise_as_matrix(noise: B.Numeric, dtype: B.DType, n: int):
    if B.isscalar(noise):
        return B.fill_diag(noise, n)
    elif B.rank(noise) == 1:
        return Diagonal(noise)
    else:
        return Dense(noise)
예제 #7
0
def test_logpdf_missing_data():
    # Setup model.
    m = 3
    noise = 1e-2
    latent_noises = 2e-2 * B.ones(m)
    kernels = [0.5 * EQ().stretch(0.75) for _ in range(m)]
    x = B.linspace(0, 10, 20)

    # Concatenate two orthogonal matrices, to make the missing data
    # approximation exact.
    u1 = B.svd(B.randn(m, m))[0]
    u2 = B.svd(B.randn(m, m))[0]
    u = Dense(B.concat(u1, u2, axis=0) / B.sqrt(2))

    s_sqrt = Diagonal(B.rand(m))

    # Construct a reference model.
    oilmm_pp = ILMMPP(kernels, u @ s_sqrt, noise, latent_noises)

    # Sample to generate test data.
    y = oilmm_pp.sample(x, latent=False)

    # Throw away data, but retain orthogonality.
    y[5:10, 3:] = np.nan
    y[10:, :3] = np.nan

    # Construct OILMM to test.
    oilmm = OILMM(kernels, u, s_sqrt, noise, latent_noises)

    # Check that evidence is still exact.
    approx(oilmm_pp.logpdf(x, y), oilmm.logpdf(x, y), atol=1e-7)
예제 #8
0
 def __call__(self, x, y):
     w_x, w_y = x.w, y.w
     x, y = x.get(), y.get()
     if x is y:
         return Diagonal(1 / w_x)
     else:
         x, y = uprank(x), uprank(y)
         return Zero(B.dtype(x), B.shape(x)[0], B.shape(y)[0])
예제 #9
0
    def _compute(self):
        # Extract processes.
        p_x, x = type_parameter(self.x), self.x.get()
        p_z, z = type_parameter(self.z), self.z.get()

        # Construct the necessary kernel matrices.
        K_zx = self.graph.kernels[p_z, p_x](z, x)
        self._K_z = convert(self.graph.kernels[p_z](z), AbstractMatrix)

        # Evaluating `e.kernel(x)` will yield incorrect results if `x` is a
        # `MultiInput`, because `x` then still designates the particular
        # components of `f`. Fix that by instead designating the elements of
        # `e`.
        if isinstance(x, MultiInput):
            x_n = MultiInput(*(p(xi.get())
                               for p, xi in zip(self.e.kernel.ps, x.get())))
        else:
            x_n = x

        # Construct the noise kernel matrix.
        K_n = self.e.kernel(x_n)

        # The approximation can only handle diagonal noise matrices.
        if not isinstance(K_n, Diagonal):
            raise RuntimeError('Kernel matrix of noise must be diagonal.')

        # And construct the components for the inducing point approximation.
        L_z = B.cholesky(self._K_z)
        self._A = B.add(B.eye(self._K_z),
                        B.iqf(K_n, B.transpose(B.solve(L_z, K_zx))))
        y_bar = uprank(self.y) - self.e.mean(x_n) - self.graph.means[p_x](x)
        prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar))

        # Compute the optimal mean.
        self._mu = B.add(self.graph.means[p_z](z),
                         B.iqf(self._A, B.solve(L_z, self._K_z), prod_y_bar))

        # Compute the ELBO.
        # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal.
        #       The rest, however, is completely generic.
        trace_part = B.ratio(Diagonal(self.graph.kernels[p_x].elwise(x)[:, 0]) -
                             Diagonal(B.iqf_diag(self._K_z, K_zx)), K_n)
        det_part = B.logdet(2 * B.pi * K_n) + B.logdet(self._A)
        iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(self._A, prod_y_bar)[0, 0]
        self._elbo = -0.5 * (trace_part + det_part + iqf_part)
예제 #10
0
    def _compute(self, measure):
        # Extract processes and inputs.
        p_x, x, noise_x = self.fdd.p, self.fdd.x, self.fdd.noise
        p_z, z, noise_z = self.u.p, self.u.x, self.u.noise

        # Construct the necessary kernel matrices.
        K_zx = measure.kernels[p_z, p_x](z, x)
        K_z = B.add(measure.kernels[p_z](z), noise_z)
        self._K_z_store[id(measure)] = K_z

        # Noise kernel matrix:
        K_n = noise_x

        # The approximation can only handle diagonal noise matrices.
        if not isinstance(K_n, Diagonal):
            raise RuntimeError(
                f"Kernel matrix of observation noise must be diagonal, "
                f'not "{type(K_n).__name__}".'
            )

        # And construct the components for the inducing point approximation.
        L_z = B.cholesky(K_z)
        A = B.add(B.eye(K_z), B.iqf(K_n, B.transpose(B.solve(L_z, K_zx))))
        self._A_store[id(measure)] = A
        y_bar = B.subtract(B.uprank(self.y), measure.means[p_x](x))
        prod_y_bar = B.solve(L_z, B.iqf(K_n, B.transpose(K_zx), y_bar))

        # Compute the optimal mean.
        mu = B.add(
            measure.means[p_z](z),
            B.iqf(A, B.solve(L_z, K_z), prod_y_bar),
        )
        self._mu_store[id(measure)] = mu

        # Compute the ELBO.
        # NOTE: The calculation of `trace_part` asserts that `K_n` is diagonal.
        # The rest, however, is completely generic.
        trace_part = B.ratio(
            Diagonal(measure.kernels[p_x].elwise(x)[:, 0])
            - Diagonal(B.iqf_diag(K_z, K_zx)),
            K_n,
        )
        det_part = B.logdet(2 * B.pi * K_n) + B.logdet(A)
        iqf_part = B.iqf(K_n, y_bar)[0, 0] - B.iqf(A, prod_y_bar)[0, 0]
        self._elbo_store[id(measure)] = -0.5 * (trace_part + det_part + iqf_part)
예제 #11
0
    def construct_model(vs):
        kernels = [
            vs.pos(1, name=f"{i}/var") *
            EQ().stretch(vs.pos(0.02, name=f"{i}/scale")) for i in range(m)
        ]
        noise = vs.pos(1e-2, name="noise")
        latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises")
        u = Dense(vs.orth(shape=(p, m), name="u"))
        s_sqrt = Diagonal(vs.pos(shape=(m, ), name="s_sqrt"))

        return OILMM(kernels, u, s_sqrt, noise, latent_noises)
예제 #12
0
def construct_oilmm():
    # Setup model.
    kernels = [EQ(), 2 * EQ().stretch(1.5)]
    u, s_sqrt = B.svd(B.randn(3, 2))[:2]
    u = Dense(u)
    s_sqrt = Diagonal(s_sqrt)

    def construct_iolmm(noise_amplification=1):
        noise_obs = noise_amplification
        noises_latent = np.array([0.1, 0.2]) * noise_amplification
        return OILMM(kernels, u, s_sqrt, noise_obs, noises_latent)

    return construct_iolmm
예제 #13
0
파일: simulators.py 프로젝트: wesselb/oilmm
    def construct_model(vs):
        if args.separable:
            # Copy same kernel `m` times.
            kernel = [
                Mat52().stretch(vs.bnd(6 * 30, lower=60, name="k_scale"))
            ]
            kernels = kernel * m
        else:
            # Parametrise different kernels.
            kernels = [
                Mat52().stretch(vs.bnd(6 * 30, lower=60, name=f"{i}/k_scale"))
                for i in range(m)
            ]
        noise = vs.bnd(1e-2, name="noise")
        latent_noises = vs.bnd(1e-2 * B.ones(m), name="latent_noises")

        # Construct component of the mixing matrix over simulators.
        u = vs.orth(init=u_s_init, shape=(p_s, m_s), name="sims/u")
        s_sqrt = vs.bnd(init=s_sqrt_s_init, shape=(m_s, ), name="sims/s_sqrt")

        u_s = Dense(u)
        s_sqrt_s = Diagonal(s_sqrt)

        # Construct components of the mixing matrix over space from a
        # covariance.
        scales = vs.bnd(init=scales_init, name="space/scales")
        k = Mat52().stretch(scales)

        u, s, _ = B.svd(B.dense(k(loc)))
        u_r = Dense(u[:, :m_r])
        s_sqrt_r = Diagonal(B.sqrt(s[:m_r]))

        # Compose.
        s_sqrt = Kronecker(s_sqrt_s, s_sqrt_r)
        u = Kronecker(u_s, u_r)

        return OILMM(kernels, u, s_sqrt, noise, latent_noises)
예제 #14
0
def test_woodbury_formatting():
    diag = Diagonal(B.ones(3))
    lr = LowRank(B.ones(3, 1), 2 * B.ones(3, 1))
    assert str(Woodbury(diag,
                        lr)) == "<Woodbury matrix: shape=3x3, dtype=float64>"
    assert (repr(Woodbury(
        diag, lr)) == "<Woodbury matrix: shape=3x3, dtype=float64\n"
            " diag=<diagonal matrix: shape=3x3, dtype=float64\n"
            "       diag=[1. 1. 1.]>\n"
            " lr=<low-rank matrix: shape=3x3, dtype=float64, rank=1\n"
            "     left=[[1.]\n"
            "           [1.]\n"
            "           [1.]]\n"
            "     right=[[2.]\n"
            "            [2.]\n"
            "            [2.]]>>")
예제 #15
0
파일: timing.py 프로젝트: wesselb/oilmm
    def construct_model(vs, m):
        kernels = [
            vs.pos(0.5, name=f"{i}/k_var") *
            Matern52().stretch(vs.bnd(2 * 30, name=f"{i}/k_scale")) +
            vs.pos(0.5, name=f"{i}/k_per_var") * (Matern52().stretch(
                vs.bnd(1.0, name=f"{i}/k_per_scale")).periodic(365))
            for i in range(m)
        ]
        noise = vs.pos(1e-2, name="noise")
        latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises")

        # Construct orthogonal matrix and time it.
        time_h_start = time.time()
        u = Dense(vs.orth(shape=(p, m), name="u"))
        s_sqrt = Diagonal(vs.pos(shape=(m, ), name="s_sqrt"))
        dur_h = time.time() - time_h_start

        return OILMM(kernels, u, s_sqrt, noise, latent_noises), dur_h
예제 #16
0
파일: test_util.py 프로젝트: wesselb/oilmm
def test_normaliser():
    # Create test data.
    mat = B.randn(3, 3)
    dist = Normal(B.randn(3, 1), mat @ mat.T)
    y = dist.sample(num=10).T

    # Create normaliser.
    norm = Normaliser(y)
    y_norm = norm.normalise(y)

    # Create distribution of normalised data.
    scale = Diagonal(norm.scale[0])
    dist_norm = Normal(
        B.inv(scale) @ (dist.mean - norm.mean.T),
        B.inv(scale) @ dist.var @ B.inv(scale))

    approx(
        B.sum(dist.logpdf(y.T)),
        B.sum(dist_norm.logpdf(y_norm.T)) + norm.normalise_logdet(y),
    )
예제 #17
0
def compute_K_z(model):
    """Covariance matrix :math:`K_z` of :math:`z_m` for :math:`m=0,\\ldots,2M`.

    Args:
        model (:class:`.gprv.GPRV`): Model.

    Returns:
        matrix: :math:`K_z`.
    """
    # Compute harmonic frequencies.
    m = model.ms - B.cast(model.dtype, model.ms > model.m_max) * model.m_max
    omega = 2 * B.pi * m / (model.b - model.a)

    # Compute the parameters of the kernel matrix.
    lam_t = 1
    alpha = 0.5 * (model.b - model.a) / psd_matern_12(omega, model.lam, lam_t)
    alpha = alpha + alpha * B.cast(model.dtype, model.ms == 0)
    beta = 1 / (lam_t**0.5) * B.cast(model.dtype, model.ms <= model.m_max)

    return Diagonal(alpha) + LowRank(left=beta[:, None])
예제 #18
0
    def construct_model(vs):
        kernels = [
            vs.pos(0.5, name=f"{i}/k_var") *
            Matern52().stretch(vs.bnd(2 * 30, name=f"{i}/k_scale")) +
            vs.pos(0.5, name=f"{i}/k_per_var") * (Matern52().stretch(
                vs.bnd(1.0, name=f"{i}/k_per_scale")).periodic(365))
            for i in range(m)
        ]
        latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises")
        noise = vs.pos(1e-2, name="noise")

        # Construct components of mixing matrix from a covariance over
        # outputs.
        variance = vs.pos(1, name="h/variance")
        scales = vs.pos(init=scales_init, name="h/scales")
        k = variance * Matern52().stretch(scales)
        u, s, _ = B.svd(B.dense(B.reg(k(loc))))
        u = Dense(u[:, :m])
        s_sqrt = Diagonal(B.sqrt(s[:m]))

        return OILMM(kernels, u, s_sqrt, noise, latent_noises)
예제 #19
0
def test_compare_ilmm():
    # Setup models.
    kernels = [EQ(), 2 * EQ().stretch(1.5)]
    noise_obs = 0.1
    noises_latent = np.array([0.1, 0.2])
    u, s_sqrt = B.svd(B.randn(3, 2))[:2]
    u = Dense(u)
    s_sqrt = Diagonal(s_sqrt)

    # Construct models.
    ilmm = ILMMPP(kernels, u @ s_sqrt, noise_obs, noises_latent)
    oilmm = OILMM(kernels, u, s_sqrt, noise_obs, noises_latent)

    # Construct data.
    x = B.linspace(0, 3, 5)
    y = ilmm.sample(x, latent=False)
    x2 = B.linspace(4, 7, 5)
    y2 = ilmm.sample(x2, latent=False)

    # Check LML before conditioning.
    approx(ilmm.logpdf(x, y), oilmm.logpdf(x, y))
    approx(ilmm.logpdf(x2, y2), oilmm.logpdf(x2, y2))

    ilmm = ilmm.condition(x, y)
    oilmm = oilmm.condition(x, y)

    # Check LML after conditioning.
    approx(ilmm.logpdf(x, y), oilmm.logpdf(x, y))
    approx(ilmm.logpdf(x2, y2), oilmm.logpdf(x2, y2))

    # Predict.
    means_pp, lowers_pp, uppers_pp = ilmm.predict(x2)
    means, lowers, uppers = oilmm.predict(x2)

    # Check predictions.
    approx(means_pp, means)
    approx(lowers_pp, lowers)
    approx(uppers_pp, uppers)
예제 #20
0
def test_woodbury_attributes():
    diag = Diagonal(B.ones(3))
    lr = LowRank(B.ones(3, 1), 2 * B.ones(3, 1))
    wb = Woodbury(diag, lr)
    assert wb.diag is diag
    assert wb.lr is lr
예제 #21
0
 def construct(diag, left, right, middle):
     return Diagonal(diag) + LowRank(left, right, middle)
예제 #22
0
    # Test giving a name to the constructor.
    p3 = GP(EQ(), name="yet_another_name", measure=m)
    assert m["yet_another_name"] is p3
    assert p3.name == "yet_another_name"
    assert m[p3] == "yet_another_name"


@pytest.mark.parametrize(
    "generate_noise_tuple",
    [
        lambda x: (),
        lambda x: (B.rand(), ),
        lambda x: (B.rand(x), ),
        lambda x: (B.diag(B.rand(x)), ),
        lambda x: (Diagonal(B.rand(x)), ),
    ],
)
def test_conditioning(generate_noise_tuple):
    m = Measure()
    p1 = GP(EQ(), measure=m)
    p2 = GP(Exp(), measure=m)
    p_sum = p1 + p2

    # Sample some data to condition on.
    x1 = B.linspace(0, 2, 3)
    n1 = generate_noise_tuple(x1)
    y1 = p1(x1, *n1).sample()
    tup1 = (p1(x1, *n1), y1)
    x_sum = B.linspace(3, 5, 3)
    n_sum = generate_noise_tuple(x_sum)
예제 #23
0
def test_structured():
    assert structured(Diagonal(B.ones(3)))
    assert not structured(Dense(B.ones(3, 3)))
    assert not structured(B.ones(3, 3))
예제 #24
0
파일: simulators.py 프로젝트: wesselb/oilmm
    def objective(vs):
        x_ind = vs.unbounded(x_ind_init, name="x_ind")
        return -construct_model(vs).logpdf(x_data, y_data, x_ind=x_ind)

    minimise_l_bfgs_b(objective, vs, trace=True, iters=args.i)

    # Print variables.
    vs.print()

    def cov_to_corr(k):
        std = B.sqrt(B.diag(k))
        return k / std[:, None] / std[None, :]

    # Compute correlations between simulators.
    u = Dense(vs["sims/u"])
    s_sqrt = Diagonal(vs["sims/s_sqrt"])
    k = u @ s_sqrt @ s_sqrt @ u.T
    std = B.sqrt(B.diag(k))
    corr_learned = cov_to_corr(k)

    # Compute empirical correlations.
    all_obs = np.concatenate(
        [sim.to_numpy()[:args.n].reshape(-1, 1) for sim in sims.values()],
        axis=1)
    corr_empirical = cov_to_corr(np.cov(all_obs.T))

    # Compute predictions for latent processes.
    model = construct_model(vs)
    model = model.condition(x_data, y_data, x_ind=vs["x_ind"])
    x_proj, y_proj, _, _ = model.project(x_data, y_data)
    means, lowers, uppers = model.model.predict(x_proj)
예제 #25
0
def test_diagonal_formatting():
    assert str(Diagonal(B.ones(3))) == "<diagonal matrix: shape=3x3, dtype=float64>"
    assert (
        repr(Diagonal(B.ones(3))) == "<diagonal matrix: shape=3x3, dtype=float64\n"
        " diag=[1. 1. 1.]>"
    )
예제 #26
0
def test_conversion_to_diagonal(dense1):
    approx(Diagonal(dense1), B.diag(B.diag(dense1)))
예제 #27
0
파일: util.py 프로젝트: wesselb/matrix
def generate(code):
    """Generate a random tensor of a particular type, specified with a code.

    Args:
        code (str): Code of the matrix.

    Returns:
        tensor: Random tensor.
    """
    mat_code, shape_code = code.split(":")

    # Parse shape.
    if shape_code == "":
        shape = ()
    else:
        shape = tuple(int(d) for d in shape_code.split(","))

    if mat_code == "randn":
        return B.randn(*shape)
    elif mat_code == "randn_pd":
        mat = B.randn(*shape)

        # If it is a scalar or vector, just pointwise square it.
        if len(shape) in {0, 1}:
            return mat**2 + 1
        else:
            return B.matmul(mat, mat, tr_b=True) + B.eye(shape[0])

    elif mat_code == "zero":
        return Zero(B.default_dtype, *shape)

    elif mat_code == "const":
        return Constant(B.randn(), *shape)
    elif mat_code == "const_pd":
        return Constant(B.randn()**2 + 1, *shape)

    elif mat_code == "lt":
        mat = B.vec_to_tril(B.randn(int(0.5 * shape[0] * (shape[0] + 1))))
        return LowerTriangular(mat)
    elif mat_code == "lt_pd":
        mat = generate(f"randn_pd:{shape[0]},{shape[0]}")
        return LowerTriangular(B.cholesky(B.reg(mat)))

    elif mat_code == "ut":
        mat = B.vec_to_tril(B.randn(int(0.5 * shape[0] * (shape[0] + 1))))
        return UpperTriangular(B.transpose(mat))
    elif mat_code == "ut_pd":
        mat = generate(f"randn_pd:{shape[0]},{shape[0]}")
        return UpperTriangular(B.transpose(B.cholesky(B.reg(mat))))

    elif mat_code == "dense":
        return Dense(generate(f"randn:{shape_code}"))
    elif mat_code == "dense_pd":
        return Dense(generate(f"randn_pd:{shape_code}"))

    elif mat_code == "diag":
        return Diagonal(generate(f"randn:{shape_code}"))
    elif mat_code == "diag_pd":
        return Diagonal(generate(f"randn_pd:{shape_code}"))

    else:
        raise RuntimeError(f'Cannot parse generation code "{code}".')
예제 #28
0
def test_kronecker_attributes():
    left = Diagonal(B.ones(2))
    right = Diagonal(B.ones(3))
    kron = Kronecker(left, right)
    assert kron.left is left
    assert kron.right is right
예제 #29
0
def test_diagonal_attributes():
    diag = Diagonal(B.ones(3))
    approx(diag.diag, B.ones(3))
예제 #30
0
 def __call__(self, x, y):
     if x is y and B.shape(uprank(x))[0] == B.shape(self.noises)[0]:
         return Diagonal(self.noises)
     else:
         x, y = uprank(x), uprank(y)
         return Zero(B.dtype(x), B.shape(x)[0], B.shape(y)[0])