def test_sample_vp(self): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model(): p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) v_params = advi(n=1000) trace = sample_vp(v_params, draws=1, hide_transformed=True) self.assertListEqual(trace.varnames, ['p']) trace = sample_vp(v_params, draws=1, hide_transformed=False) self.assertListEqual(sorted(trace.varnames), ['p', 'p_logodds_'])
def test_advi_minibatch_shared(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = shared(np.zeros(1, )) def create_minibatches(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100], ) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=create_minibatches(data), total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_advi_minibatch_shared(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = shared(np.zeros(1,)) def create_minibatches(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100],) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=create_minibatches(data), total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_advi_optimizer(): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.RandomState(0).randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model() as model: mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) optimizer = adagrad_optimizer(learning_rate=0.1, epsilon=0.1) advi_fit = advi(model=model, n=1000, optimizer=optimizer, random_seed=1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000, model) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def fit(self, x, y): if self.verbose: logging.info('Building pymc3 model') self.make_model(x, y) if self.verbose: logging.info('Sampling...') with self.model: minibatch_tensors = [ self.product_id_var, self.route_id_var, self.client_id_var, self.adjusted_demand_var ] output_rvs = [self.adjusted_demand] total_size = x.shape[0] self.v_params = variational.advi_minibatch( n=int(1e6), minibatch_tensors=minibatch_tensors, minibatch_RVs=output_rvs, total_size=total_size, minibatches=self.minibatch_tensors(x, y) ) plt.plot(self.v_params.elbo_vals[-int(1e5):]) plt.savefig('./elbo.png') self.trace = variational.sample_vp(self.v_params) if self.verbose: print(pm.summary(self.trace[100:], varnames=['route_demand', 'client_demand'])) return self
def test_advi(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=1000, accurate_elbo=False, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=5, accurate_elbo=False, learning_rate=1e-1)
def test_advi(): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.RandomState(0).randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model() as model: mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi( model=model, n=1000, accurate_elbo=False, learning_rate=1e-1, random_seed=1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000, model) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_sample_vp(): n_samples = 100 rng = np.random.RandomState(0) xs = rng.binomial(n=1, p=0.2, size=n_samples) with pm.Model() as model: p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) v_params = advi(n=1000) with model: trace = sample_vp(v_params, hide_transformed=True) assert(set(trace.varnames) == set('p')) with model: trace = sample_vp(v_params, hide_transformed=False) assert(set(trace.varnames) == set(('p', 'p_logodds_')))
def test_sample_vp(): n_samples = 100 rng = np.random.RandomState(0) xs = rng.binomial(n=1, p=0.2, size=n_samples) with pm.Model() as model: p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) v_params = advi(n=1000) with model: trace = sample_vp(v_params, hide_transformed=True) assert (set(trace.varnames) == set('p')) with model: trace = sample_vp(v_params, hide_transformed=False) assert (set(trace.varnames) == set(('p', 'p_logodds_')))
def test_advi_minibatch(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = floatX(sd * np.random.randn(n) + mu) d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value = floatX(np.zeros(1,)) def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100],) minibatches = create_minibatch(data) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=5, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with pytest.raises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch( n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e10)
def fit(self, n_steps=30000): """ Creates a Bayesian Estimation model for replicate measurements of treatment(s) vs. control. Parameters ---------- n_steps : int The number of steps to run ADVI. """ sample_names = set(self.data[self.sample_col].values) sample_names.remove(self.baseline_name) with Model() as model: # Hyperpriors upper = Exponential('upper', lam=0.05) nu = Exponential('nu_minus_one', 1 / 29.) + 1 # "fold", which is the estimated fold change. fold = Uniform('fold', lower=1E-10, upper=upper, shape=len(sample_names)) # Assume that data have heteroskedastic (i.e. variable) error but # are drawn from the same HalfCauchy distribution. sigma = HalfCauchy('sigma', beta=1, shape=len(sample_names)) # Model prediction mu = fold[self.data['indices']] sig = sigma[self.data['indices']] # Data likelihood like = StudentT('like', nu=nu, mu=mu, sd=sig**-2, observed=self.data[self.output_col]) self.model = model with model: params = advi(n=n_steps) trace = sample_vp(params, draws=2000) self.trace = trace
def test_advi_minibatch(): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.RandomState(0).randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value = np.zeros(1, ) with Model() as model: mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) minibatch_RVs = [x] minibatch_tensors = [data_t] def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100], ) minibatches = create_minibatch(data) with model: advi_fit = advi_minibatch(n=1000, minibatch_tensors=minibatch_tensors, minibatch_RVs=minibatch_RVs, minibatches=minibatches, total_size=n, learning_rate=1e-1, random_seed=1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_advi_minibatch(): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.RandomState(0).randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value=np.zeros(1,) with Model() as model: mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) minibatch_RVs = [x] minibatch_tensors = [data_t] def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100],) minibatches = create_minibatch(data) with model: advi_fit = advi_minibatch( n=1000, minibatch_tensors=minibatch_tensors, minibatch_RVs=minibatch_RVs, minibatches=minibatches, total_size=n, learning_rate=1e-1, random_seed=1 ) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_advi(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=1000, accurate_elbo=False, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) h = self.handler self.assertTrue(h.matches(msg="converged")) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=5, accurate_elbo=False, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with self.assertRaises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=1000, accurate_elbo=False, learning_rate=1e10)
def test_advi_optimizer(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) optimizer = adagrad_optimizer(learning_rate=0.1, epsilon=0.1) advi_fit = advi(n=1000, optimizer=optimizer) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def test_advi(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=1000, accurate_elbo=False, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) h = self.handler assert h.matches(msg="converged") # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=5, accurate_elbo=False, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with pytest.raises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data) advi_fit = advi(n=1000, accurate_elbo=False, learning_rate=1e10)
def test_advi_minibatch(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d data_t = tt.vector() data_t.tag.test_value = np.zeros(1, ) def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield (data[:100], ) minibatches = create_minibatch(data) with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) np.testing.assert_allclose(advi_fit.means['mu'], mu_post, rtol=0.1) trace = sample_vp(advi_fit, 10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) # Test for n < 10 with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=5, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e-1) # Check to raise NaN with a large learning coefficient with self.assertRaises(FloatingPointError): with Model(): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) x = Normal('x', mu=mu_, sd=sd, observed=data_t) advi_fit = advi_minibatch(n=1000, minibatch_tensors=[data_t], minibatch_RVs=[x], minibatches=minibatches, total_size=n, learning_rate=1e10)