def main(): """ Load and pre-process data. """ data_path = ( 'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_blogVisits.csv' ) time_between_visits = loadData(data_path) """ Solution to question 4.1.4 """ MLE_first_time_visit_rate = compute_MLE_mean_time_between_visits( time_between_visits) print( "The MLE of the first time visit rate for a blog, using 50 data points." ) print("MLE estimate for lambda: ", MLE_first_time_visit_rate) """ Solution to question 4.1.5 """ log_likelihoods, mean_visit_rates = log_likelihood_as_function_of_mean_visit_rate( time_between_visits) log_likelihood_MLE = compute_log_likelihoods(time_between_visits, MLE_first_time_visit_rate) plotter( log_likelihoods, mean_visit_rates, MLE_first_time_visit_rate, log_likelihood_MLE, "The log likelihood as a function of the rate between first time visits", "Avg no. visits per min", "log likelihood")
def main(): # data_path = ('C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\conjugate_epil.csv') # epilepsy_data = loadData(data_path) # # epilepsy_counts = get_column(epilepsy_data["x"]) # a = 3 # b = 0.5 # data = [3, 7, 4, 10, 11] # # disease_outbreaks_model = Disease_outbreaks_model(a, b, data) """ Q 10.1.1 The Gamma Prior """ # gamma_prior, lambda_range = disease_outbreaks_model.gamma_prior() # plotter(lambda_range, gamma_prior, 'The prior '+ r'$\Gamma$' +'(3, 0.5)', r'$\lambda$', "pdf") """ Q 10.1.2 The Gamma conjugate Prior of the Poisson Likelihood""" # gamma_posterior, lambda_range = disease_outbreaks_model.gamma_prior() # plotter(lambda_range, gamma_posterior, 'The posterior '+ r'$\Gamma$' +'(3 + 35, 0.5 + 5)', r'$\lambda$', "pdf") """ Q 10.1.3-4 Posterior predictive""" # no_iterations = 10000 # posterior_predictive_disease_outbreaks = disease_outbreaks_model.sample_posterior_predictive_distribution(no_iterations) # # # plotter_histogram(posterior_predictive_disease_outbreaks, 'No disease outbreaks', 'Frequency', 'Posterior Predictive distribution over disease outbreaks') # # # print("Data maximum: ", np.max(data)) # print("Data minimum: ", np.min(data)) # # print('Pr(T(fake) >= T(actual)_max | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) >= np.max(data))) # print('Pr(T(fake) <= T(actual)_min | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) <= np.min(data))) # # # """ Q 10.1.6 Posterior predictive""" # print('Pr(T(fake) >= 20 | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) >= 20 )) """ Q 10.1.7-8 Posterior predictive""" a = 3 b = 0.5 data = [3, 7, 4, 10, 11, 20] disease_outbreaks_model = Disease_outbreaks_model(a, b, data) # New poserior gamma_posterior, lambda_range = disease_outbreaks_model.gamma_prior() plotter(lambda_range, gamma_posterior, 'The posterior ' + r'$\Gamma$' + '(3 + 35 + 20, 0.5 + 5 + 1)', r'$\lambda$', "pdf") no_iterations = 10000 posterior_predictive_disease_outbreaks = disease_outbreaks_model.sample_posterior_predictive_distribution( no_iterations) print('Pr(T(fake) >= 20 | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) >= 20))
def main(): """ Load and pre-process data. """ data_path = ( 'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_NewYorkCrimeUnemployment.csv' ) crime_dataframe = loadData(data_path) population = convert_str_to_int(reshape_data( crime_dataframe["Population"])) violent_crime_count = convert_str_to_int( reshape_data(crime_dataframe["Violent_crime_count"])) plotter(population, violent_crime_count, "population", "violent crime count") """ Problem 4.2.3 """ MLE_estimate = MLE_estimator(violent_crime_count, population) print("The MLE is:", MLE_estimate)
def main(): """ Load and pre-process data. """ data_path = ( 'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_blogVisits.csv' ) time_between_visits = loadData(data_path) """ Solution to question 4.1.4 """ MLE_first_time_visit_rate = compute_MLE_mean_time_between_visits( time_between_visits) print( "The MLE of the first time visit rate for a blog, using 50 data points." ) print("MLE estimate for lambda: ", MLE_first_time_visit_rate) """Generating data using the MLE parameter for the visit rate to evaluate our model""" generated_beer_visits = generate_data_samples( 1 / MLE_first_time_visit_rate, len(time_between_visits)) # Plotting the generated data against the actual data plotter(generated_beer_visits, time_between_visits)
def main(): """ Load and pre-process data. """ data_path = ( 'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\denominator_NBCoins.csv' ) crime_dataframe = loadData(data_path) failuers_before_five_successes = convert_str_to_int( reshape_data(crime_dataframe["No failuers before 5 successes"])) theta1_range = np.arange(0.001, 0.999, 0.01) theta2_range = np.arange(0.001, 0.999, 0.01) """ Compute the likelihood over the theta_1 and theta_2 ranges. """ likelihoods = compute_likelihood(failuers_before_five_successes, theta1_range, theta2_range) """ Create a contour plot over the rwo theta values.""" plotter( theta1_range, theta2_range, likelihoods, r'The posterior shape over $\theta_1$ and $\theta_2$ for the Negative Binomial likelihood', r"$\theta_1$", r"$\theta_2$")
def main(): """ Load and pre-process data. """ data_path = ('C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\posterior_gdpInfantMortality.csv') GDP_data = loadData(data_path) mortality_data = get_column(GDP_data["infant.mortality"]) gdp_data = get_column(GDP_data["gdp"]) # Remove data points with missing data i.e. NaN values. gdps, mortalities = remove_missing_data(gdp_data, mortality_data) # Fit the data to a normal distribution model i.e. least squares. gdp_log_range, mortality_log_values, intercept, slope = fit_normal_dist_model( gdps, mortalities) # Plot the log(data) and the model. plotter(np.log(mortalities), np.log(gdps), "log(mortalities)", 'log(GDP)', gdp_log_range, np.array(mortality_log_values).reshape(-1), "A Normal Distribution fit to the logged data - alpha " + str(round(intercept, 3)) + " beta " + str(round(slope, 3))) calculate_standard_error(np.log(gdps), np.log(mortalities)) calculate_RMSE(gdps, mortalities, intercept, slope)
def main(): data_path = ( 'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\conjugate_epil.csv' ) epilepsy_data = loadData(data_path) epilepsy_counts = get_column(epilepsy_data["x"]) a = 4 b = 0.25 eilepsy_model = Epilepsy_model(a, b) """ Q 9.2.3 The Gamma Posterior """ # posterior, theta_range = eilepsy_model.posterior_gamma(epilepsy_counts) # # plotter(theta_range, posterior, "Gamma(a + " +r'$\sum^n_{i=1}(x_i)$'+", b + n) posterior over " + r'$\theta$', r'$\theta$', "pdf") """ Q 9.2.5 The Gamma Posterior Predictive distribution """ posterior_NB, theta_range = eilepsy_model.posterior_predictive_gamma( epilepsy_counts) plotter( theta_range, posterior_NB, "Gamma(a + " + r'$\sum^n_{i=1}(x_i)$' + ", b + n) posterior over " + r'$\theta$', r'$\theta$', "pdf")
def main(): """ The Multinomial Likelihood for n_A = 6, n_B = 3 and n_C = 1 """ Election_Likelihood_Model = Election_model(6, 3, 1, 10) likelihoods, pB_range, pA_range = Election_Likelihood_Model.compute_multinomial_likelihood() plotter(pA_range, pB_range, likelihoods, "Multinomial Likelihood over " + r'$p_A$' + " and " + r'$p_B$' , r'$p_A$' , r'$p_B$') """ The Conjugate Dirichlet(1, 1, 1) prior """ priors, pB_range, pA_range = Election_Likelihood_Model.dirichlet_prior(1, 1, 1) plotter(pA_range, pB_range, priors, "Dirichlet prior using " + r'$\alpha_A, \alpha_B, \alpha_C = 1 $' + " for "+ r'$p_A$' +" and " + r'$p_B$' , r'$p_A$' , r'$p_B$', plt.cm.YlGnBu) """ The Dirichlet(1, 1, 1) posterior """ posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior( 1, 1, 1) plotter(pA_range, pB_range, posterior, "Posterior with Multinomial Likelihood and Dirichlet(1, 1, 1) prior over " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$') """ Posterior Mean using Dirichlet uniform prior""" posterior_mean_dirichlet_uniform_prior = Election_Likelihood_Model.posterior_mean( 1, 1, 1) print("Posterior mean parameters: p_A = ", posterior_mean_dirichlet_uniform_prior[0], " p_B = ", posterior_mean_dirichlet_uniform_prior[1], " p_C = ", posterior_mean_dirichlet_uniform_prior[2]) """ The Conjugate Dirichlet(10, 10, 10) prior """ priors, pB_range, pA_range = Election_Likelihood_Model.dirichlet_prior(10, 10, 10) plotter(pA_range, pB_range, priors, "Dirichlet(10, 10, 10) prior using " + r'$\alpha_A, \alpha_B, \alpha_C = 1 $' + " for "+ r'$p_A$' +" and " + r'$p_B$' , r'$p_A$' , r'$p_B$', plt.cm.YlGnBu) """ The Dirichlet(10+6, 10+3, 10+1) posterior """ posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior( 10, 10, 10) plotter(pA_range, pB_range, posterior, "Posterior with Multinomial Likelihood and Dirichlet(10, 10, 10) prior over " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$') """ The Multinomial Likelihood for n_A = 60, n_B = 30 and n_C = 10 """ Election_Likelihood_Model = Election_model(60, 30, 10, 100) likelihoods, pB_range, pA_range = Election_Likelihood_Model.compute_multinomial_likelihood() plotter(pA_range, pB_range, likelihoods, "Multinomial Likelihood over " + r'$p_A$' + " and " + r'$p_B$' , r'$p_A$' , r'$p_B$') """ The Dirichlet(10 + 60, 10 + 30, 10 + 10) posterior """ posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior( 10, 10, 10) plotter(pA_range, pB_range, posterior, "Posterior Dirichlet(10 + 60, 10 + 30, 10 + 10) with Multinomial Likelihood and Dirichlet(10, 10, 10) prior over " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$')