def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) Mixture( "x_obs", w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1)
def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet('w', np.ones_like(self.pois_w)) mu = Gamma('mu', 1., 1., shape=self.pois_w.size) x_obs = Mixture( 'x_obs', w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False) assert_allclose(np.sort(trace['w'].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace['mu'].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1)
def test_check_discrete_minibatch(): disaster_data_t = tt.vector() disaster_data_t.tag.test_value = np.zeros(len(disaster_data)) with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data_t) def create_minibatch(): while True: return (disaster_data, ) # This should raise ValueError assert_raises(ValueError, advi_minibatch, model=disaster_model, n=10, minibatch_RVs=[disasters], minibatch_tensors=[disaster_data_t], minibatches=create_minibatch(), verbose=False)
def test_mixture_list_of_poissons(self): with Model() as model: w = Dirichlet('w', np.ones_like(self.pois_w)) mu = Gamma('mu', 1., 1., shape=self.pois_w.size) Mixture('x_obs', w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() trace = sample(5000, step, random_seed=self.random_seed, progressbar=False) assert_allclose(np.sort(trace['w'].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) assert_allclose(np.sort(trace['mu'].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1)
def test_check_discrete_minibatch(self): disaster_data_t = tt.vector() disaster_data_t.tag.test_value = np.zeros(len(self.disaster_data)) def create_minibatches(): while True: return (self.disaster_data, ) with Model(): switchpoint = DiscreteUniform('switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = tt.switch(switchpoint >= self.year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data_t) with self.assertRaises(ValueError): advi_minibatch(n=10, minibatch_RVs=[disasters], minibatch_tensors=[disaster_data_t], minibatches=create_minibatches())
def test_check_discrete(self): with Model(): switchpoint = DiscreteUniform( 'switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = tt.switch(switchpoint >= self.year, early_rate, late_rate) Poisson('disasters', rate, observed=self.disaster_data) # This should raise ValueError with self.assertRaises(ValueError): advi(n=10)
def test_check_discrete(): with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data) # This should raise ValueError assert_raises(ValueError, advi, model=disaster_model, n=10)
plt.ylabel("Disaster count") plt.xlabel("Year") plt.show() from pymc3 import DiscreteUniform, Poisson, switch, Model, Exponential, NUTS, Metropolis, sample, traceplot with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data) step1 = NUTS([early_rate, late_rate]) # Use Metropolis for switchpoint, and missing values since it accommodates discrete variables step2 = Metropolis([switchpoint, disasters.missing_values[0]]) trace = sample(10000, step=[step1, step2]) traceplot(trace)
def mcmc_changepoint(dates, ratings, mcmc_iter=1000, discrete=0, plot_result=1): """This function models Yelp reviews as coming from two normal distributions with a switch point somewhere between them. When left of the switch point then reviews are drawn from the first normal distribution. To the right of the switch point reviews are drawn from the second normal distribution. Normal distributions are used if the reviews have been normalized to the user's average rating; otherwise if analyzing in terms of 1-5 stars set discrete=1 and the function will do the same estimation on Poisson distributions. This function then finds the most likely distribution for where the switchpoint is and the most likely parameters for the two generator distributions by using Metropolis-Hastings sampling and Hamiltonian Monte Carlo.""" # dates: Array of dates when the reviews were posted # ratings: Array of the ratings given by each review # mcmc_iter: How many iterations of the MCMC to run? # discrete: Should I use Normal or Poisson distributions to model the ratings? # (i.e. are the user-averaged or 1-5 stars) # plot_result: Should the function output a plot? number_of_ratings = np.arange(0, len(ratings)) if discrete == 0: with Model() as switch_model: switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(dates)) before_intensity = Normal('before_intensity', mu=0, sd=1) after_intensity = Normal('after_intensity', mu=0, sd=1) intensity = switch(switchpoint >= number_of_ratings, before_intensity, after_intensity) sigma = HalfNormal('sigma', sd=1) rating = Normal('rating', mu=intensity, sd=sigma, observed=ratings) elif discrete == 1: with Model() as switch_model: switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(dates)) before_intensity = Exponential('before_intensity', 1) after_intensity = Exponential('after_intensity', 1) intensity = switch(switchpoint >= number_of_ratings, before_intensity, after_intensity) rating = Poisson('rating', intensity, observed=ratings) with switch_model: trace = sample(mcmc_iter) if plot_result == 1: traceplot(trace) plt.show() switch_posterior = trace['switchpoint'] N_MCs = switch_posterior.shape[0] before_intensity_posterior = trace['before_intensity'] after_intensity_posterior = trace['after_intensity'] expected_stars = np.zeros(len(ratings)) for a_rating in number_of_ratings: where_switch = a_rating < switch_posterior expected_stars[a_rating] = ( before_intensity_posterior[where_switch].sum() + after_intensity_posterior[~where_switch].sum()) / N_MCs if plot_result == 1: plt.plot(dates, ratings, 'o') plt.plot(dates, expected_stars, 'b-') plt.show() # Return the mode and it's frequency / mcmc_iter b_mean, b_count = scipy.stats.mode(trace['before_intensity']) a_mean, a_count = scipy.stats.mode(trace['after_intensity']) modal_switch, count = scipy.stats.mode(trace['switchpoint']) sigma_est, sigma_count = scipy.stats.mode(trace['sigma']) differential = b_mean - a_mean return differential, modal_switch, expected_stars, sigma_est, switch_posterior
basic_model = Model() # Specify model components with basic_model: # Priors for unknown model parameters (Stochastic random vars) s = Uniform('s', lower=0, upper=200000, shape=5) # scaling of bg component # Expected value of outcome (Deterministic var) sim_2v_y = s[0] * pdf_2v_y # scaled bg component sim_Th_y = s[1] * pdf_Th_y sim_U_y = s[2] * pdf_U_y sim_K_y = s[3] * pdf_K_y sim_Co_y = s[4] * pdf_Co_y model_y = sim_2v_y + sim_Th_y + sim_U_y + sim_K_y + sim_Co_y # Likelihood (sampling distribution) of observations (Observed Stochastic var) L = Poisson('L', mu=model_y[fit_index_lo:fit_index_hi], observed=data_y[fit_index_lo:fit_index_hi]) ############### # Fit model and get posterior estimates for parameters ############### # Import a sampler from pymc3 import Metropolis, HamiltonianMC, sample # Setup sampler within the context of the model with basic_model: # Set some starting value guesses start = {'s': [0., 0., 0., 0., 0.]} # Instantiate sampler step = HamiltonianMC([s])