def test_updates(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() gpm.maxiter = 2 assert_true(gpm.baseline) assert_true(gpm.latents) assert_true(gpm.regressors) baseline = gpm.nodes['baseline'] baseline.update() uu = gpm.Nframe['unit'] nn = gpm.Nframe['count'] npt.assert_allclose(baseline.post_shape, baseline.prior_shape + nn.groupby(uu).sum()) fr_latents = gpm.nodes['fr_latents'] fr_latents.update(1) fr_regressors = gpm.nodes['fr_regressors'] fr_regressors.update() # add overdispersion gpm.initialize_overdispersion(**self.overdisp_dict) gpm.finalize() od = gpm.nodes['overdispersion'] od.update()
def test_calc_log_evidence(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() logpsi = gpm.calc_log_evidence(2) assert_equals(logpsi.shape, (self.T, 2))
def test_expected_log_evidence(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() Elogp = gpm.expected_log_evidence() assert_is_instance(Elogp, np.float64)
def test_iterate(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_hier_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() L_init = gpm.L(keeplog=True) gpm.iterate(keeplog=True, verbosity=2) assert_true(gpm.L() > L_init)
def test_inference(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_hier_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() gpm.maxiter = 2 gpm.iterate() assert_true(~np.isnan(gpm.L()))
def test_L(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() assert_is_instance(gpm.L(), np.float64) initial_log_len = len(gpm.log['L']) gpm.L(keeplog=True) assert_equals(len(gpm.log['L']), initial_log_len + 1)
def test_can_initialize_baseline(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_dict) assert_in('baseline', gpm.nodes) assert_is_instance(gpm.nodes['baseline'], nd.GammaNode) baseline = gpm.nodes['baseline'] prs = self.baseline_dict['prior_shape'] prr = self.baseline_dict['prior_rate'] pos = self.baseline_dict['post_shape'] por = self.baseline_dict['post_rate'] npt.assert_array_equal(prs, baseline.prior_shape) npt.assert_array_equal(prr, baseline.prior_rate) npt.assert_array_equal(pos, baseline.post_shape) npt.assert_array_equal(por, baseline.post_rate)
def test_can_instantiate_model_object(self): gpm = gp.GammaModel(self.N, self.K) assert_is_instance(gpm, gp.GammaModel) assert_equals(gpm.U, self.U) assert_equals(gpm.T, self.T) assert_equals(gpm.K, self.K) assert_equals(gpm.Xframe.shape, (self.T * self.U, self.X.shape[1] - 1)) # get regressors for first unit first_unit = gpm.Xframe.groupby(self.N['time']).first() # compare to X itself npt.assert_array_equal(first_unit, self.X.iloc[:, 1:].values) assert_is_instance(gpm.nodes, dict)
def test_finalize(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.finalize() assert_true(gpm.latents) assert_true(not gpm.regressors) assert_true(not gpm.overdispersion) gpm.F_prod() gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() assert_true(gpm.latents) assert_true(gpm.regressors) assert_true(not gpm.overdispersion) gpm.G_prod()
def test_G_prod(self): # initialize model gpm = gp.GammaModel(self.N, self.K) # initialize fr effects gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() # initialize/cache F_prod gpm.G_prod(update=True) # test shapes assert_equals(gpm.G_prod(1).shape, (self.M,)) assert_equals(gpm.G_prod().shape, (self.M,)) # test caching npt.assert_allclose(gpm.G_prod(1), gpm._Gk[..., 1]) npt.assert_allclose(gpm.G_prod(), gpm._G)
def test_hier_updates(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_hier_dict) gpm.initialize_fr_latents(**self.fr_latent_hier_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_hier_dict) gpm.finalize() assert_true(gpm.baseline) assert_true(gpm.latents) assert_true(gpm.regressors) baseline = gpm.nodes['baseline'] baseline.update() fr_latents = gpm.nodes['fr_latents'] fr_latents.update(1) fr_regressors = gpm.nodes['fr_regressors'] fr_regressors.update()
def test_F_prod(self): # initialize model gpm = gp.GammaModel(self.N, self.K) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.finalize() # initialize/cache F_prod gpm.F_prod(update=True) # test shapes assert_equals(gpm.F_prod(1).shape, (self.M,)) assert_equals(gpm.F_prod().shape, (self.M,)) # test caching npt.assert_allclose(gpm.F_prod(1), gpm._Fk[..., 1]) npt.assert_allclose(gpm.F_prod(), gpm._F)
def test_can_initialize_baseline_hierarchy(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_baseline(**self.baseline_hier_dict) assert_in('baseline', gpm.nodes) assert_is_instance(gpm.nodes['baseline_shape'], nd.GammaNode)
def test_can_initialize_fr_latents(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_fr_latents(**self.fr_latent_dict) assert_in('fr_latents', gpm.nodes) assert_is_instance(gpm.nodes['fr_latents'], nd.GammaNode)
def test_duration_dist_optimization(self): D = 50 # add duration dist d_hypers = (2.5, 4., 2., 40.) d_pars = ({'d_prior_mean': d_hypers[0] * np.ones((2, self.K)), 'd_prior_scaling': d_hypers[1] * np.ones((2, self.K)), 'd_prior_shape': d_hypers[2] * np.ones((2, self.K)), 'd_prior_rate': d_hypers[3] * np.ones((2, self.K))}) self.latent_dict.update(d_pars) d_inits = (3., 1.1, 1.7, 2.) d_post_pars = ({'d_post_mean': d_inits[0] * np.ones((2, self.K)), 'd_post_scaling': d_inits[1] * np.ones((2, self.K)), 'd_post_shape': d_inits[2] * np.ones((2, self.K)), 'd_post_rate': d_inits[3] * np.ones((2, self.K))}) self.latent_dict.update(d_post_pars) # instatiate model gpm = gp.GammaModel(self.N, self.K, D) gpm.initialize_baseline(**self.baseline_dict) gpm.initialize_fr_latents(**self.fr_latent_dict) gpm.initialize_latents(**self.latent_dict) gpm.initialize_fr_regressors(**self.fr_regressors_dict) gpm.finalize() dnode = gpm.nodes['HMM'].nodes['d'] par = dnode.parent # check that for C = 0, prior and posterior have same parameters k = 1 # before update, priors and inits are correct assert_equals(par.post_mean[0, k], d_inits[0]) assert_equals(par.post_scaling[0, k], d_inits[1]) assert_equals(par.post_shape[0, k], d_inits[2]) assert_equals(par.post_rate[0, k], d_inits[3]) assert_equals(par.prior_mean[0, k], d_hypers[0]) assert_equals(par.prior_scaling[0, k], d_hypers[1]) assert_equals(par.prior_shape[0, k], d_hypers[2]) assert_equals(par.prior_rate[0, k], d_hypers[3]) # after update with C = 0, posteriors are priors dnode.update(k, 0.0) npt.assert_allclose(par.post_mean[..., k], par.prior_mean[..., k], rtol=1e-3) npt.assert_allclose(par.post_scaling[..., k], par.prior_scaling[..., k], rtol=1e-3) npt.assert_allclose(par.post_shape[..., k], par.prior_shape[..., k], rtol=1e-3) npt.assert_allclose(par.post_rate[..., k], par.prior_rate[..., k], rtol=1e-3) # now set C and scale to be large; after update, logpd should be # the ML estimate, which is just C normalized lpd = np.empty((2, D)) mm = np.array([10, 15]) ss = np.array([0.5, 0.25]) for m in xrange(2): lpd[m] = stats.lognorm.logpdf(xrange(1, D + 1), scale=mm[m], s=ss[m]) lpd -= np.logaddexp.reduce(lpd, 1, keepdims=True) # normalization scale_up = 1e5 bigC = scale_up * np.exp(lpd) dnode.update(k, bigC) import matplotlib.pyplot as plt import seaborn as sns from helpers import lognormal_from_hypers logm = np.empty(2) logstd = np.empty(2) for m in xrange(2): mu = par.post_mean[m, k] lam = par.post_scaling[m, k] alpha = par.post_shape[m, k] beta = par.post_rate[m, k] samples = lognormal_from_hypers(mu, lam, alpha, beta, N=1e6) valid = (samples >= 1) & (samples <= 50) samples = samples[valid] logm[m] = np.mean(np.log(samples)) logstd[m] = np.std(np.log(samples)) plt.plot(xrange(1, D + 1), np.exp(lpd[m])) sns.kdeplot(samples, gridsize=1e5, clip=(1, 50)) npt.assert_allclose(np.log(mm), logm, rtol=1e-2) npt.assert_allclose(ss, logstd, rtol=1e-2)
def test_can_initialize_fr_regressors_hierarchy(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_fr_regressors(**self.fr_regressors_hier_dict) assert_in('fr_regressors', gpm.nodes) assert_is_instance(gpm.nodes['fr_regressors'], nd.GammaNode) assert_is_instance(gpm.nodes['fr_regressors_shape'], nd.GammaNode)
def test_can_initialize_overdispersion_hierarchy(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_overdispersion(**self.overdisp_hier_dict) assert_in('overdispersion', gpm.nodes) assert_is_instance(gpm.nodes['overdispersion'], nd.GammaNode) assert_is_instance(gpm.nodes['overdispersion_shape'], nd.GammaNode)
def test_can_initialize_latents(self): gpm = gp.GammaModel(self.N, self.K) gpm.initialize_latents(**self.latent_dict) assert_in('HMM', gpm.nodes)