def step(self, x, grad): """Perform a gradient step. Args: x (tensor): Current input value. This value will be updated in-place. grad (tensor): Current gradient. Returns: tensor: `x` after updating `x` in-place. """ if self.m is None or self.v is None: self.m = B.zeros(x) self.v = B.zeros(x) # Update estimates of moments. self.m *= self.beta1 self.m += (1 - self.beta1) * grad self.v *= self.beta2 self.v += (1 - self.beta2) * grad ** 2 # Correct for bias of initialisation. m_corr = self.m / (1 - self.beta1 ** (self.i + 1)) v_corr = self.v / (1 - self.beta2 ** (self.i + 1)) # Perform update. if self.local_rates: denom = B.sqrt(B.mean(v_corr)) + self.epsilon else: denom = B.sqrt(v_corr) + self.epsilon x -= self.rate * m_corr / denom # Increase iteration number. self.i += 1 return x
def sum(a: Zero, axis=None): if axis is None: return B.cast(a.dtype, 0) elif axis == 0: return B.zeros(a.dtype, a.cols) elif axis == 1: return B.zeros(a.dtype, a.rows) else: _raise(axis)
def test_diag_block_diag(diag1, diag2): approx( B.diag(diag1, diag2), B.concat2d( [B.dense(diag1), B.zeros(B.dense(diag2))], [B.zeros(B.dense(diag2)), B.dense(diag2)], ), ) assert isinstance(B.diag(diag1, diag2), Diagonal)
def test_subshape(check_lazy_shapes): assert B.shape(B.zeros(2), 0) == 2 assert B.shape(B.zeros(2, 3, 4), 1) == 3 assert B.shape(B.zeros(2, 3, 4), 0, 2) == (2, 4) assert B.shape(B.zeros(2, 3, 4), 0, 1, 2) == (2, 3, 4) # Check for possible infinite recursion. with pytest.raises(NotFoundLookupError): B.shape(None, 1)
def test_diag_block_dense(dense1, dense2): with AssertDenseWarning(concat_warnings): res = B.diag(dense1, dense2) approx( res, B.concat2d( [B.dense(dense1), B.zeros(B.dense(dense1))], [B.zeros(B.dense(dense2)), B.dense(dense2)], ), ) assert isinstance(res, Dense)
def compute_I_ux(model, t1=None, t2=None): """Compute the :math:`I_{ux}` integral. Args: model (:class:`.gpcm.GPCM`): Model. t1 (tensor, optional): First time input. Defaults to zero. t2 (tensor, optional): Second time input. Defaults to zero. Returns: tensor: Value of :math:`I_{ux}` for all `t1`, `t2`. """ if t1 is None: t1 = B.zeros(model.dtype, 1) squeeze_t1 = True else: squeeze_t1 = False if t2 is None: t2 = B.zeros(model.dtype, 1) squeeze_t2 = True else: squeeze_t2 = False t1 = t1[:, None, None, None] t2 = t2[None, :, None, None] t_u_1 = model.t_u[None, None, :, None] t_u_2 = model.t_u[None, None, None, :] exppoly = model.k_h(var("t1") - var("tau"), var("t_u_1")) exppoly = exppoly * model.k_h(var("t_u_2"), var("t2") - var("tau")) if model.causal: upper = var("min_t1_t2") else: upper = np.inf result = exppoly.integrate_box( ("tau", -np.inf, upper), t1=t1, t2=t2, t_u_1=t_u_1, t_u_2=t_u_2, min_t1_t2=B.minimum(t1, t2), ) if squeeze_t1 and squeeze_t2: return result[0, 0, :, :] elif squeeze_t1: return result[0, :, :, :] elif squeeze_t2: return result[:, 0, :, :] else: return result
def test_normal_mean_is_zero(): # Check zero case. dist = Normal(B.eye(3)) assert dist.mean_is_zero approx(dist.mean, B.zeros(3, 1)) # Check another zero case. dist = Normal(Zero(np.float32, 3, 1), B.eye(3)) assert dist.mean_is_zero approx(dist.mean, B.zeros(3, 1)) # Check nonzero case. assert not Normal(B.randn(3, 1), B.eye(3)).mean_is_zero
def diag(a, b): # We could merge this with `block`, but `block` has a lot of overhead. It # seems advantageous to optimise this common case. warn_upmodule( f"Constructing a dense block-diagonal matrix from " f"{a} and {b}: converting to dense.", category=ToDenseWarning, ) a = B.dense(a) b = B.dense(b) dtype = B.dtype(a) ar, ac = B.shape(a) br, bc = B.shape(b) return Dense( B.concat2d([a, B.zeros(dtype, ar, bc)], [B.zeros(dtype, br, ac), b]))
def assert_lower_triangular(x): """Assert that a matrix is lower triangular.""" # Check that matrix is square. assert B.shape(x)[0] == B.shape(x)[1] # Check that upper part is all zeros. upper = x[np.triu_indices(B.shape(x)[0], k=1)] approx(upper, B.zeros(upper))
def test_prior_power(Model): t_u = B.zeros(1) model = Model(window=2, scale=1, n_u=10, t=(0, 10)) K_u = model.compute_K_u() # Estimate power with Monte Carlo. powers = [] for _ in range(2_000): u = B.sample(K_u)[:, 0] powers.append(model.kernel_approx(t_u, t_u, u)[0, 0])
def compute_I_ux(model, t1=None, t2=None): """Compute the :math:`I_{ux}` integral. Args: model (:class:`.gprv.GPRV`): Model. t1 (tensor, optional): First time input. Defaults to zero. t2 (tensor, optional): Second time input. Defaults to zero. Returns: tensor: Value of :math:`I_{ux}` for all `t1`, `t2`. """ if t1 is None: t1 = B.zeros(model.dtype, 1) squeeze_t1 = True else: squeeze_t1 = False if t2 is None: t2 = B.zeros(model.dtype, 1) squeeze_t2 = True else: squeeze_t2 = False t1 = t1[:, None, None, None] t2 = t2[None, :, None, None] t_u_1 = model.t_u[None, None, :, None] t_u_2 = model.t_u[None, None, None, :] ga = model.gamma - model.alpha result = ( model.alpha_t**2 * model.gamma_t**2 * B.exp(-model.gamma * (t_u_1 + t_u_2) + ga * (t1 + t2)) * integral_abcd_lu(-t1, t_u_2 - t1, -t2, t_u_1 - t2, ga, model.lam) ) if squeeze_t1 and squeeze_t2: return result[0, 0, :, :] elif squeeze_t1: return result[0, :, :, :] elif squeeze_t2: return result[:, 0, :, :] else: return result
def sample(model, t, noise_f): """Sample from a model. Args: model (:class:`gpcm.model.AbstractGPCM`): Model to sample from. t (vector): Time points to sample at. noise_f (vector): Noise for the sample of the function. Should have the same size as `t`. Returns: tuple[vector, ...]: Tuple containing kernel samples, filter samples, and function samples. """ ks, us, fs = [], [], [] # In the below, we look at the third inducing point, because that is the one # determining the value of the filter at zero: the CGPCM adds two extra inducing # points to the left. # Get a smooth sample. u1 = B.ones(model.n_u) while B.abs(u1[2]) > 1e-2: u1 = B.sample(model.compute_K_u())[:, 0] u = GP(model.k_h()) u = u | (u(model.t_u), u1) u1_full = u(t).mean.flatten() # Get a rough sample. u2 = B.zeros(model.n_u) while u2[2] < 0.5: u2 = B.sample(model.compute_K_u())[:, 0] u = GP(model.k_h()) u = u | (u(model.t_u), u2) u2_full = u(t).mean.flatten() with wbml.out.Progress(name="Sampling", total=5) as progress: for c in [0, 0.1, 0.23, 0.33, 0.5]: # Sample kernel. K = model.kernel_approx(t, t, c * u2 + (1 - c) * u1) wbml.out.kv("Sampled variance", K[0, 0]) K = K / K[0, 0] ks.append(K[0, :]) # Store filter. us.append(c * u2_full + (1 - c) * u1_full) # Sample function. f = B.matmul(B.chol(closest_psd(K)), noise_f) fs.append(f) progress() return ks, us, fs
def test_normal_lazy_zero_mean(): dist = Normal(lambda: B.eye(3)) assert dist.mean_is_zero assert dist._mean is 0 assert dist._var is None approx(dist.mean, B.zeros(3, 1)) # At this point, the variance should be constructed, because it is used to get the # dimensionality and data type for the mean. assert dist._var is not None approx(dist.var, B.eye(3))
def test_recurrent(): vs = Vars(np.float32) # Test setting the initial hidden state. layer = Recurrent(GRU(10), B.zeros(1, 10)) layer.initialise(5, vs) approx(layer.h0, B.zeros(1, 10)) layer = Recurrent(GRU(10)) layer.initialise(5, vs) assert layer.h0 is not None # Check batch consistency. check_batch_consistency(layer, B.randn(30, 20, 5)) # Test preservation of rank upon calls. assert B.shape(layer(B.randn(20, 5))) == (20, 10) assert B.shape(layer(B.randn(30, 20, 5))) == (30, 20, 10) # Check that zero-dimensional calls fail. with pytest.raises(ValueError): layer(0)
def test_normalise(): layer = Normalise(epsilon=0) x = B.randn(10, 5, 3) # Check number of weights and width. assert layer.num_weights(10) == 0 assert layer.width == 10 # Check initialisation and width. layer.initialise(3, None) assert layer.width == 3 # Check correctness out = layer(x) approx(B.std(out, axis=2), B.ones(10, 5), rtol=1e-4) approx(B.mean(out, axis=2), B.zeros(10, 5), atol=1e-4)
def __call__(self, x): # Put the batch dimension second. x_rank = B.rank(x) if x_rank == 2: x = x[:, None, :] elif x_rank == 3: x = B.transpose(x, perm=(1, 0, 2)) else: raise ValueError(f"Cannot handle inputs of rank {B.rank(x)}.") # Recurrently apply the cell. n, batch_size, m = B.shape(x) y0 = B.zeros(B.dtype(x), batch_size, self.cell.width) h0 = B.tile(self.h0, batch_size, 1) res = B.scan(self.cell, x, h0, y0)[1] # Put the batch dimension first again. res = B.transpose(res, perm=(1, 0, 2)) # Remove the batch dimension, if that didn't exist before. if x_rank == 2: res = res[0, :, :] return res
def test_device_and_to_active_device(check_lazy_shapes): # Check moving a device to the CPU. for a in Tensor(2, 2).forms(): assert "cpu" in str(B.device(a)).lower() approx(B.to_active_device(a), a) # Check that numbers remain unchanged. a = 1 assert B.to_active_device(a) is a @pytest.mark.parametrize("t", [tf.float32, torch.float32, jnp.float32]) @pytest.mark.parametrize( "f", [ lambda t: B.zeros(t, 2, 2), lambda t: B.ones(t, 2, 2), lambda t: B.eye(t, 2), lambda t: B.linspace(t, 0, 5, 10), lambda t: B.range(t, 10), lambda t: B.rand(t, 10), lambda t: B.randn(t, 10), ], ) def test_on_device(f, t, check_lazy_shapes): f_t = f(t) # Contruct on current and existing device. # Set the active device to something else. B.ActiveDevice.active_name = "previous" # Check that explicit allocation on CPU works.
def test_normal_dtype(normal1): assert B.dtype(Normal(0, B.eye(3))) == np.float64 assert B.dtype(Normal(B.ones(3), B.zeros(int, 3))) == np.float64 assert B.dtype(Normal(B.ones(int, 3), B.zeros(int, 3))) == np.int64
starting from the origin. k (vector): Kernel. n_zero (int, optional): Zero padding. Defaults to `2_000`. db (bool, optional): Convert to decibel. Defaults to `False`. Returns: vector: PSD, correctly scaled. """ # Convert to NumPy for compatibility with frameworks. t, k = B.to_numpy(t, k) if t[0] != 0: raise ValueError("Time points must start at zero.") # Perform zero padding. k = B.concat(k, B.zeros(n_zero)) # Symmetrise and Fourier transform. k_symmetric = B.concat(k, k[1:-1][::-1]) psd = np.fft.fft(k_symmetric) freqs = np.fft.fftfreq(len(psd)) / (t[1] - t[0]) # Should be real and positive, but the numerics may not be in our favour. psd = np.abs(np.real(psd)) # Now scale appropriately: the total power should equal `k[0]`. total_power = np.trapz(y=psd, x=freqs) psd /= total_power / k[0] # Convert to dB. if db:
def test_cholesky_retry_factor(check_lazy_shapes): # Try `cholesky_retry_factor = 1`. B.cholesky_retry_factor = 1 B.cholesky(B.zeros(3, 3)) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * B.epsilon) with pytest.raises(np.linalg.LinAlgError): B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 10 * B.epsilon) with pytest.raises(np.linalg.LinAlgError): B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 100 * B.epsilon) # Try `cholesky_retry_factor = 10`. B.cholesky_retry_factor = 10 B.cholesky(B.zeros(3, 3)) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * B.epsilon) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 10 * B.epsilon) with pytest.raises(np.linalg.LinAlgError): B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 100 * B.epsilon) # Try `cholesky_retry_factor = 100`. B.cholesky_retry_factor = 100 B.cholesky(B.zeros(3, 3)) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * B.epsilon) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 10 * B.epsilon) B.cholesky(B.zeros(3, 3) - 0.5 * B.eye(3) * 100 * B.epsilon) # Reset the factor! B.cholesky_retry_factor = 1
def dense(a: Zero): if a.dense is None: a.dense = B.zeros(a.dtype, a.rows, a.cols) return a.dense
def _pad_zero_row(a): zeros = B.zeros(B.dtype(a), 1, B.shape(a)[1]) return B.concat(a, zeros, axis=0)
def diag(a: Zero): return B.zeros(B.dtype(a), _diag_len(a))
def _pad_zero_col(a): zeros = B.zeros(B.dtype(a), B.shape(a)[0], 1) return B.concat(a, zeros, axis=1)
def test_dense_zero(zero1): approx(B.dense(zero1), B.zeros(zero1.rows, zero1.cols)) _check_cache(zero1)