def test_Dense(): """Tests probflow.modules.Dense""" # Should error w/ int < 1 with pytest.raises(ValueError): dense = Dense(0, 1) with pytest.raises(ValueError): dense = Dense(5, -1) # Create the module dense = Dense(5, 1) # Test MAP outputs are same x = tf.random.normal([4, 5]) samples1 = dense(x) samples2 = dense(x) assert np.all(samples1.numpy() == samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 1 # Test samples are different with Sampling(): samples1 = dense(x) samples2 = dense(x) assert np.all(samples1.numpy() != samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 1 # parameters should return [weights, bias] param_list = dense.parameters assert isinstance(param_list, list) assert len(param_list) == 2 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in dense.parameters] assert "Dense_weights" in param_names assert "Dense_bias" in param_names weights = [p for p in dense.parameters if p.name == "Dense_weights"] assert weights[0].shape == [5, 1] bias = [p for p in dense.parameters if p.name == "Dense_bias"] assert bias[0].shape == [1, 1] # kl_loss should return sum of KL losses kl_loss = dense.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # test Flipout with Sampling(flipout=True): samples1 = dense(x) samples2 = dense(x) assert np.all(samples1.numpy() != samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 1
def epistemic_sample(self, x=None, n=1000, batch_size=None): """Draw samples of the model's estimate given x, including only epistemic uncertainty (uncertainty due to uncertainty as to the model's parameter values) TODO: Docs... Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). n : int Number of samples to draw from the model per datapoint. batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). Returns ------- |ndarray| Samples from the predicted mean distribution. Size (num_samples, x.shape[0], ...) """ with Sampling(n=n, flipout=False): return self._sample( x, lambda x: x.mean(), ed=0, batch_size=batch_size )
def train_fn(x_data, y_data): self.reset_kl_loss() with Sampling(n=n_mc, flipout=flipout): self._optimizer.zero_grad() elbo_loss = self.elbo_loss(x_data, y_data, n, n_mc) elbo_loss.backward() self._optimizer.step() return elbo_loss
def train_fn(x_data, y_data): self.reset_kl_loss() with Sampling(n=n_mc, flipout=flipout): with tf.GradientTape() as tape: elbo_loss = self.elbo_loss(x_data, y_data, n, n_mc) variables = self.trainable_variables gradients = tape.gradient(elbo_loss, variables) self._optimizer.apply_gradients(zip(gradients, variables)) return elbo_loss
def elbo_loss(self, *args): self._probflow_model.reset_kl_loss() with Sampling(n=1, flipout=False): if len(args) == 1: elbo_loss = self._probflow_model.elbo_loss( None, args[0], n) else: elbo_loss = self._probflow_model.elbo_loss( args[0], args[1], n) return elbo_loss
def test_BatchNormalization(): """Tests probflow.modules.BatchNormalization""" # Create the module bn = BatchNormalization(5) # Test MAP outputs are the same x = torch.randn([4, 5]) samples1 = bn(x) samples2 = bn(x) assert np.all(samples1.detach().numpy() == samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 5 # Samples should actually be the same b/c using deterministic posterior with Sampling(): samples1 = bn(x) samples2 = bn(x) assert np.all(samples1.detach().numpy() == samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 5 # parameters should return list of all parameters param_list = bn.parameters assert isinstance(param_list, list) assert len(param_list) == 2 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in bn.parameters] assert "BatchNormalization_weight" in param_names assert "BatchNormalization_bias" in param_names param_shapes = [p.shape for p in bn.parameters] assert [1, 5] in param_shapes # kl_loss should return sum of KL losses kl_loss = bn.kl_loss() assert isinstance(kl_loss, torch.Tensor) assert kl_loss.ndim == 0 # Test it works w/ dense layer and sequential seq = Sequential([ Dense(5, 10), BatchNormalization(10), torch.nn.ReLU(), Dense(10, 3), BatchNormalization(3), torch.nn.ReLU(), Dense(3, 1), ]) assert len(seq.parameters) == 10 out = seq(torch.randn([6, 5])) assert out.ndim == 2 assert out.shape[0] == 6 assert out.shape[1] == 1
def test_Sequential(): """Tests probflow.modules.Sequential""" # Create the module seq = Sequential( [Dense(5, 10), tf.nn.relu, Dense(10, 3), tf.nn.relu, Dense(3, 1)]) # Steps should be list assert isinstance(seq.steps, list) assert len(seq.steps) == 5 # Test MAP outputs are the same x = tf.random.normal([4, 5]) samples1 = seq(x) samples2 = seq(x) assert np.all(samples1.numpy() == samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 1 # Test samples are different with Sampling(): samples1 = seq(x) samples2 = seq(x) assert np.all(samples1.numpy() != samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 4 assert samples1.shape[1] == 1 # parameters should return list of all parameters param_list = seq.parameters assert isinstance(param_list, list) assert len(param_list) == 6 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in seq.parameters] assert "Dense_weights" in param_names assert "Dense_bias" in param_names param_shapes = [p.shape for p in seq.parameters] assert [5, 10] in param_shapes assert [1, 10] in param_shapes assert [10, 3] in param_shapes assert [1, 3] in param_shapes assert [3, 1] in param_shapes assert [1, 1] in param_shapes # kl_loss should return sum of KL losses kl_loss = seq.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0
def posterior_sample(self, n: int = 1): """Sample from the posterior distribution. Parameters ---------- n : int > 0 Number of samples to draw from the posterior distribution. Default = 1 Returns ------- TODO """ if n < 1: raise ValueError("n must be positive") with Sampling(n=n): return to_numpy(self())
def posterior_sample(self, n: int = 1): """Sample from the posterior distribution. Parameters ---------- n : int > 0 Number of samples to draw from the posterior distribution. Default = 1 Returns ------- |ndarray| Samples from the parameter's posterior distribution. If ``n>1`` of size ``(n, self.prior.shape)``. If ``n==1``, of size ``(self.prior.shape)``. """ if n < 1: raise ValueError("n must be positive") with Sampling(n=n): return to_numpy(self())
def predictive_sample(self, x=None, n=1000): """Draw samples from the posterior predictive distribution given x TODO: Docs... Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). n : int Number of samples to draw from the model per datapoint. Returns ------- |ndarray| Samples from the predictive distribution. Size (num_samples, x.shape[0], ...) """ with Sampling(n=n, flipout=False): return self._sample(x, lambda x: x.sample(), ed=0)
def test_Module(): """Tests the Module abstract base class""" class TestModule(Module): def __init__(self): self.p1 = Parameter(name="TestParam1") self.p2 = Parameter(name="TestParam2", shape=[5, 4]) def __call__(self, x): return O.sum(self.p2(), axis=None) + x * self.p1() the_module = TestModule() # parameters should return a list of all the parameters param_list = the_module.parameters assert isinstance(param_list, list) assert len(param_list) == 2 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in param_list] assert "TestParam1" in param_names assert "TestParam2" in param_names # n_parameters property nparams = the_module.n_parameters assert isinstance(nparams, int) assert nparams == 21 # n_variables property nvars = the_module.n_variables assert isinstance(nvars, int) assert nvars == 42 # trainable_variables should return list of all variables in the model var_list = the_module.trainable_variables assert isinstance(var_list, list) assert len(var_list) == 4 assert all(isinstance(v, tf.Variable) for v in var_list) # kl_loss should return sum of all the kl losses kl_loss = the_module.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # calling a module should return a tensor x = tf.random.normal([5]) sample1 = the_module(x) assert isinstance(sample1, tf.Tensor) assert sample1.ndim == 1 assert sample1.shape[0] == 5 # should be the same when sampling is off sample2 = the_module(x) assert np.all(sample1.numpy() == sample2.numpy()) # outputs should be different when sampling is on with Sampling(n=1): sample1 = the_module(x) sample2 = the_module(x) assert np.all(sample1.numpy() != sample2.numpy()) # bayesian_update should update all params in the module assert tf.reduce_all( the_module.p1.prior.loc != the_module.p1.posterior.loc).numpy() assert tf.reduce_all( the_module.p2.prior.scale != the_module.p2.posterior.scale).numpy() the_module.bayesian_update() assert tf.reduce_all( the_module.p1.prior.loc == the_module.p1.posterior.loc).numpy() assert tf.reduce_all( the_module.p2.prior.scale == the_module.p2.posterior.scale).numpy()
def test_Embedding(): """Tests probflow.modules.Embedding""" # Should error w/ int < 1 with pytest.raises(ValueError): emb = Embedding(0, 1) with pytest.raises(ValueError): emb = Embedding(5, -1) # Should error w/ k and d of different lengths with pytest.raises(ValueError): emb = Embedding([2, 3], [2, 3, 4]) # Create the module emb = Embedding(10, 5) # Check parameters assert len(emb.parameters) == 1 assert emb.parameters[0].name == "Embedding_0" assert emb.parameters[0].shape == [10, 5] # Embeddings should be DeterministicParameters by default assert all(isinstance(e, DeterministicParameter) for e in emb.embeddings) # Test MAP outputs are the same x = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32) samples1 = emb(x) samples2 = emb(x) assert np.all(samples1.numpy() == samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 20 assert samples1.shape[1] == 5 # Samples should actually be the same b/c using deterministic posterior with Sampling(n=1): samples1 = emb(x) samples2 = emb(x) assert np.all(samples1.numpy() == samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 20 assert samples1.shape[1] == 5 # kl_loss should return sum of KL losses kl_loss = emb.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # Should be able to embed multiple columns by passing list of k and d emb = Embedding([10, 20], [5, 4]) # Check parameters assert len(emb.parameters) == 2 assert emb.parameters[0].name == "Embedding_0" assert emb.parameters[0].shape == [10, 5] assert emb.parameters[1].name == "Embedding_1" assert emb.parameters[1].shape == [20, 4] # Test MAP outputs are the same x1 = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32) x2 = tf.random.uniform([20, 1], minval=0, maxval=19, dtype=tf.dtypes.int32) x = tf.concat([x1, x2], axis=1) samples1 = emb(x) samples2 = emb(x) assert np.all(samples1.numpy() == samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 20 assert samples1.shape[1] == 9 # With probabilistic = True, samples should be different emb = Embedding(10, 5, probabilistic=True) x = tf.random.uniform([20, 1], minval=0, maxval=9, dtype=tf.dtypes.int32) with Sampling(n=1): samples1 = emb(x) samples2 = emb(x) assert np.all(samples1.numpy() != samples2.numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 20 assert samples1.shape[1] == 5 assert all(not isinstance(e, DeterministicParameter) for e in emb.embeddings)
def test_Module(): """Tests the Module abstract base class""" class TestModule(Module): def __init__(self): self.p1 = Parameter(name="TestParam1") self.p2 = Parameter(name="TestParam2", shape=[5, 4]) def __call__(self, x): return O.sum(self.p2(), axis=None) + x * self.p1() the_module = TestModule() # parameters should return a list of all the parameters param_list = the_module.parameters assert isinstance(param_list, list) assert len(param_list) == 2 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in param_list] assert "TestParam1" in param_names assert "TestParam2" in param_names # n_parameters property nparams = the_module.n_parameters assert isinstance(nparams, int) assert nparams == 21 # n_variables property nvars = the_module.n_variables assert isinstance(nvars, int) assert nvars == 42 # trainable_variables should return list of all variables in the model var_list = the_module.trainable_variables assert isinstance(var_list, list) assert len(var_list) == 4 assert all(isinstance(v, tf.Variable) for v in var_list) # kl_loss should return sum of all the kl losses kl_loss = the_module.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # calling a module should return a tensor x = tf.random.normal([5]) sample1 = the_module(x) assert isinstance(sample1, tf.Tensor) assert sample1.ndim == 1 assert sample1.shape[0] == 5 # should be the same when sampling is off sample2 = the_module(x) assert np.all(sample1.numpy() == sample2.numpy()) # outputs should be different when sampling is on with Sampling(): sample1 = the_module(x) sample2 = the_module(x) assert np.all(sample1.numpy() != sample2.numpy()) # A second test module which contains sub-modules class TestModule2(Module): def __init__(self, shape): self.mod = TestModule() self.p3 = Parameter(name="TestParam3", shape=shape) def __call__(self, x): return self.mod(x) + O.sum(self.p3(), axis=None) the_module = TestModule2([3, 2]) # parameters should return a list of all the parameters param_list = the_module.parameters assert isinstance(param_list, list) assert len(param_list) == 3 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in param_list] assert "TestParam1" in param_names assert "TestParam2" in param_names assert "TestParam3" in param_names # n_params property nparams = the_module.n_parameters assert isinstance(nparams, int) assert nparams == 27 # trainable_variables should return list of all variables in the model var_list = the_module.trainable_variables assert isinstance(var_list, list) assert len(var_list) == 6 assert all(isinstance(v, tf.Variable) for v in var_list) # kl_loss should return sum of all the kl losses kl_loss = the_module.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # parent module's loss should be greater than child module's assert the_module.kl_loss().numpy() > the_module.mod.kl_loss().numpy() # calling a module should return a tensor x = tf.random.normal([5]) sample1 = the_module(x) assert isinstance(sample1, tf.Tensor) assert sample1.ndim == 1 assert sample1.shape[0] == 5 # of the appropriate size x = tf.random.normal([5, 4]) sample1 = the_module(x) assert isinstance(sample1, tf.Tensor) assert sample1.ndim == 2 assert sample1.shape[0] == 5 assert sample1.shape[1] == 4 # Another test module which contains lists/dicts w/ parameters class TestModule3(Module): def __init__(self): self.a_list = [ Parameter(name="TestParam4"), Parameter(name="TestParam5"), ] self.a_dict = { "a": Parameter(name="TestParam6"), "b": Parameter(name="TestParam7"), } def __call__(self, x): return (tf.ones([x.shape[0], 1]) + self.a_list[0]() + self.a_list[1]() + self.a_dict["a"]() + self.a_dict["b"]()) the_module = TestModule3() # parameters should return a list of all the parameters param_list = the_module.parameters assert isinstance(param_list, list) assert len(param_list) == 4 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in param_list] assert "TestParam4" in param_names assert "TestParam5" in param_names assert "TestParam6" in param_names assert "TestParam7" in param_names # n_params property nparams = the_module.n_parameters assert isinstance(nparams, int) assert nparams == 4 # Should be able to initialize and add kl losses the_module.reset_kl_loss() assert the_module.kl_loss_batch() == 0 the_module.add_kl_loss(3.145) assert is_close(the_module.kl_loss_batch().numpy(), 3.145) # And should also be able to pass two dists to add_kl_loss the_module.reset_kl_loss() d1 = tfd.Normal(0.0, 1.0) d2 = tfd.Normal(1.0, 1.0) assert the_module.kl_loss_batch() == 0 the_module.add_kl_loss(d1, d2) assert the_module.kl_loss_batch().numpy() > 0.0
def test_DenseNetwork(): """Tests probflow.modules.DenseNetwork""" # Should error w/ int < 1 with pytest.raises(ValueError): DenseNetwork([0, 1, 5]) with pytest.raises(ValueError): DenseNetwork([5, -1, 4]) # Create the module dense_net = DenseNetwork([5, 4, 3, 2]) # Test MAP outputs are same x = torch.randn([7, 5]) samples1 = dense_net(x) samples2 = dense_net(x) assert np.all(samples1.detach().numpy() == samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2 # Test samples are different with Sampling(n=1): samples1 = dense_net(x) samples2 = dense_net(x) assert np.all(samples1.detach().numpy() != samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2 # parameters should return [weights, bias] for each layer param_list = dense_net.parameters assert isinstance(param_list, list) assert len(param_list) == 6 assert all(isinstance(p, Parameter) for p in param_list) param_names = [p.name for p in dense_net.parameters] assert "DenseNetwork_Dense0_weights" in param_names assert "DenseNetwork_Dense0_bias" in param_names assert "DenseNetwork_Dense1_weights" in param_names assert "DenseNetwork_Dense1_bias" in param_names assert "DenseNetwork_Dense2_weights" in param_names assert "DenseNetwork_Dense2_bias" in param_names shapes = { "DenseNetwork_Dense0_weights": [5, 4], "DenseNetwork_Dense0_bias": [1, 4], "DenseNetwork_Dense1_weights": [4, 3], "DenseNetwork_Dense1_bias": [1, 3], "DenseNetwork_Dense2_weights": [3, 2], "DenseNetwork_Dense2_bias": [1, 2], } for name, shape in shapes.items(): param = [p for p in dense_net.parameters if p.name == name] assert param[0].shape == shape # kl_loss should return sum of KL losses kl_loss = dense_net.kl_loss() assert isinstance(kl_loss, torch.Tensor) assert kl_loss.ndim == 0 # test Flipout with Sampling(n=1, flipout=True): samples1 = dense_net(x) samples2 = dense_net(x) assert np.all(samples1.detach().numpy() != samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2 # With probabilistic = False dense_net = DenseNetwork([5, 4, 3, 2], probabilistic=False) with Sampling(n=1): samples1 = dense_net(x) samples2 = dense_net(x) assert np.all(samples1.detach().numpy() == samples2.detach().numpy()) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2 # With batch norm before dense_net = DenseNetwork([5, 4, 3, 2], batch_norm=True, batch_norm_loc="after") with Sampling(n=1): samples1 = dense_net(x) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2 # With batch norm after dense_net = DenseNetwork([5, 4, 3, 2], batch_norm=True, batch_norm_loc="before") with Sampling(n=1): samples1 = dense_net(x) assert samples1.ndim == 2 assert samples1.shape[0] == 7 assert samples1.shape[1] == 2
def test_Parameter_scalar(): """Tests the generic scalar Parameter""" # Create scalar parameter param = Parameter() # Check defaults assert isinstance(param.shape, list) assert param.shape[0] == 1 assert isinstance(param.untransformed_variables, dict) assert all(isinstance(p, str) for p in param.untransformed_variables) assert all( isinstance(p, tf.Variable) for _, p in param.untransformed_variables.items()) # Shape should be >0 with pytest.raises(ValueError): Parameter(shape=-1) with pytest.raises(ValueError): Parameter(shape=[20, 0, 1]) # trainable_variables should be a property returning list of vars assert all(isinstance(v, tf.Variable) for v in param.trainable_variables) # variables should be a property returning dict of transformed vars assert isinstance(param.variables, dict) assert all(isinstance(v, str) for v in param.variables) # loc should be variable, while scale should have been transformed->tensor assert isinstance(param.variables["loc"], tf.Variable) assert isinstance(param.variables["scale"], tf.Tensor) # posterior should be a distribution object assert isinstance(param.posterior, BaseDistribution) assert isinstance(param.posterior(), tfd.Normal) # __call__ should return the MAP estimate by default sample1 = param() sample2 = param() assert sample1.ndim == 1 assert sample2.ndim == 1 assert sample1.shape[0] == 1 assert sample2.shape[0] == 1 assert sample1.numpy() == sample2.numpy() # within a Sampling statement, should randomly sample from the dist with Sampling(n=1): sample1 = param() sample2 = param() assert sample1.ndim == 1 assert sample2.ndim == 1 assert sample1.shape[0] == 1 assert sample2.shape[0] == 1 assert sample1.numpy() != sample2.numpy() # sampling statement should effect N samples with Sampling(n=10): sample1 = param() sample2 = param() assert sample1.ndim == 2 assert sample2.ndim == 2 assert sample1.shape[0] == 10 assert sample1.shape[1] == 1 assert sample2.shape[0] == 10 assert sample2.shape[1] == 1 assert np.all(sample1.numpy() != sample2.numpy()) # sampling statement should allow static samples sample1 = param() with Sampling(static=True): with Sampling(n=1): sample2 = param() sample3 = param() with Sampling(static=True): with Sampling(n=1): sample4 = param() sample5 = param() assert sample1.ndim == 1 assert sample2.ndim == 1 assert sample3.ndim == 1 assert sample4.ndim == 1 assert sample5.ndim == 1 assert sample1.shape[0] == 1 assert sample2.shape[0] == 1 assert sample3.shape[0] == 1 assert sample4.shape[0] == 1 assert sample5.shape[0] == 1 assert sample1.numpy() != sample2.numpy() assert sample1.numpy() != sample3.numpy() assert sample2.numpy() == sample3.numpy() assert sample1.numpy() != sample4.numpy() assert sample1.numpy() != sample5.numpy() assert sample4.numpy() == sample5.numpy() assert sample2.numpy() != sample4.numpy() # sampling statement should allow static samples (and work w/ n>1) with Sampling(static=True): with Sampling(n=5): sample1 = param() sample2 = param() with Sampling(static=True): with Sampling(n=5): sample3 = param() sample4 = param() assert sample1.ndim == 2 assert sample2.ndim == 2 assert sample3.ndim == 2 assert sample4.ndim == 2 assert sample1.shape[0] == 5 assert sample1.shape[1] == 1 assert sample2.shape[0] == 5 assert sample2.shape[1] == 1 assert sample3.shape[0] == 5 assert sample3.shape[1] == 1 assert sample4.shape[0] == 5 assert sample4.shape[1] == 1 assert np.all(sample1.numpy() == sample2.numpy()) assert np.all(sample1.numpy() != sample3.numpy()) assert np.all(sample1.numpy() != sample4.numpy()) assert np.all(sample2.numpy() != sample3.numpy()) assert np.all(sample2.numpy() != sample4.numpy()) assert np.all(sample3.numpy() == sample4.numpy()) # kl_loss should return sum of kl divergences kl_loss = param.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # prior_sample should be 1D prior_sample = param.prior_sample() assert prior_sample.ndim == 0 prior_sample = param.prior_sample(n=7) assert prior_sample.ndim == 1 assert prior_sample.shape[0] == 7 # prior and posterior shouldn't be the same (post was randomly initialized) assert tf.reduce_all(param.prior.loc != param.posterior.loc).numpy() assert tf.reduce_all(param.prior.scale != param.posterior.scale).numpy() # but they should be the same after running bayesian_update param.bayesian_update() assert tf.reduce_all(param.prior.loc == param.posterior.loc).numpy() assert tf.reduce_all(param.prior.scale == param.posterior.scale).numpy()
def log_prob( self, x, y=None, individually=True, distribution=False, n=1000, batch_size=None, ): """Compute the log probability of `y` given the model TODO: Docs... Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or Tensor Independent variable values of the dataset to evaluate (aka the "features"). y : |ndarray| or |DataFrame| or |Series| or Tensor Dependent variable values of the dataset to evaluate (aka the "target"). individually : bool If ``individually`` is True, returns log probability for each sample individually, so return shape is ``(x.shape[0], ?)``. If ``individually`` is False, returns sum of all log probabilities, so return shape is ``(1, ?)``. distribution : bool If ``distribution`` is True, returns log probability posterior distribution (``n`` samples from the model), so return shape is ``(?, n)``. If ``distribution`` is False, returns log posterior probabilities using the maximum a posteriori estimate for each parameter, so the return shape is ``(?, 1)``. n : int Number of samples to draw for each distribution if ``distribution=True``. batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). Returns ------- log_probs : |ndarray| Log probabilities. Shape is determined by ``individually``, ``distribution``, and ``n`` kwargs. """ # Get a distribution of samples if distribution: with Sampling(n=1, flipout=False): probs = [] for i in range(n): t_probs = [] for x_data, y_data in make_generator( x, y, batch_size=batch_size ): if x_data is None: t_probs += [self().log_prob(y_data)] else: t_probs += [self(x_data).log_prob(y_data)] probs += [np.concatenate(to_numpy(t_probs), axis=0)] probs = np.stack(to_numpy(probs), axis=probs[0].ndim) # Use MAP estimates else: probs = [] for x_data, y_data in make_generator(x, y, batch_size=batch_size): if x_data is None: probs += [self().log_prob(y_data)] else: probs += [self(x_data).log_prob(y_data)] probs = np.concatenate(to_numpy(probs), axis=0) # Return log prob of each sample or sum of log probs if individually: return probs else: return np.sum(probs, axis=0)
def test_Parameter_scalar(): """Tests the generic scalar Parameter""" # Create scalar parameter param = Parameter() # Check defaults assert isinstance(param.shape, list) assert param.shape[0] == 1 assert isinstance(param.untransformed_variables, dict) assert all(isinstance(p, str) for p in param.untransformed_variables) assert all( isinstance(p, tf.Variable) for _, p in param.untransformed_variables.items()) # Shape should be >0 with pytest.raises(ValueError): Parameter(shape=-1) with pytest.raises(ValueError): Parameter(shape=[20, 0, 1]) # trainable_variables should be a property returning list of vars assert all(isinstance(v, tf.Variable) for v in param.trainable_variables) # variables should be a property returning dict of transformed vars assert isinstance(param.variables, dict) assert all(isinstance(v, str) for v in param.variables) # loc should be variable, while scale should have been transformed->tensor assert isinstance(param.variables["loc"], tf.Variable) assert isinstance(param.variables["scale"], tf.Tensor) # posterior should be a distribution object assert isinstance(param.posterior, BaseDistribution) assert isinstance(param.posterior(), tfd.Normal) # __call__ should return the MAP estimate by default sample1 = param() sample2 = param() assert sample1.ndim == 1 assert sample2.ndim == 1 assert sample1.shape[0] == 1 assert sample2.shape[0] == 1 assert sample1.numpy() == sample2.numpy() # within a Sampling statement, should randomly sample from the dist with Sampling(): sample1 = param() sample2 = param() assert sample1.ndim == 1 assert sample2.ndim == 1 assert sample1.shape[0] == 1 assert sample2.shape[0] == 1 assert sample1.numpy() != sample2.numpy() # sampling statement should effect N samples with Sampling(n=10): sample1 = param() sample2 = param() assert sample1.ndim == 2 assert sample2.ndim == 2 assert sample1.shape[0] == 10 assert sample1.shape[1] == 1 assert sample2.shape[0] == 10 assert sample2.shape[1] == 1 assert np.all(sample1.numpy() != sample2.numpy()) # kl_loss should return sum of kl divergences kl_loss = param.kl_loss() assert isinstance(kl_loss, tf.Tensor) assert kl_loss.ndim == 0 # prior_sample should be 1D prior_sample = param.prior_sample() assert prior_sample.ndim == 0 prior_sample = param.prior_sample(n=7) assert prior_sample.ndim == 1 assert prior_sample.shape[0] == 7