def main():
    """ Load and pre-process data. """

    data_path = (
        'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_blogVisits.csv'
    )
    time_between_visits = loadData(data_path)
    """ Solution to question 4.1.4 """
    MLE_first_time_visit_rate = compute_MLE_mean_time_between_visits(
        time_between_visits)

    print(
        "The MLE of the first time visit rate for a blog, using 50 data points."
    )
    print("MLE estimate for lambda: ", MLE_first_time_visit_rate)
    """ Solution to question 4.1.5 """
    log_likelihoods, mean_visit_rates = log_likelihood_as_function_of_mean_visit_rate(
        time_between_visits)

    log_likelihood_MLE = compute_log_likelihoods(time_between_visits,
                                                 MLE_first_time_visit_rate)

    plotter(
        log_likelihoods, mean_visit_rates, MLE_first_time_visit_rate,
        log_likelihood_MLE,
        "The log likelihood as a function of the rate between first time visits",
        "Avg no. visits per min", "log likelihood")
def main():

    # data_path = ('C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\conjugate_epil.csv')
    # epilepsy_data = loadData(data_path)
    #
    # epilepsy_counts = get_column(epilepsy_data["x"])

    # a = 3
    # b = 0.5
    # data = [3, 7, 4, 10, 11]
    #
    # disease_outbreaks_model = Disease_outbreaks_model(a, b, data)
    """ Q 10.1.1 The Gamma Prior """
    # gamma_prior, lambda_range = disease_outbreaks_model.gamma_prior()
    # plotter(lambda_range, gamma_prior, 'The prior '+ r'$\Gamma$' +'(3, 0.5)', r'$\lambda$', "pdf")
    """ Q 10.1.2 The Gamma conjugate Prior of the Poisson Likelihood"""
    # gamma_posterior, lambda_range = disease_outbreaks_model.gamma_prior()
    # plotter(lambda_range, gamma_posterior, 'The posterior '+ r'$\Gamma$' +'(3 + 35, 0.5 + 5)', r'$\lambda$', "pdf")
    """ Q 10.1.3-4 Posterior predictive"""
    # no_iterations = 10000
    # posterior_predictive_disease_outbreaks = disease_outbreaks_model.sample_posterior_predictive_distribution(no_iterations)
    #
    # # plotter_histogram(posterior_predictive_disease_outbreaks, 'No disease outbreaks', 'Frequency', 'Posterior Predictive distribution over disease outbreaks')
    #
    #
    # print("Data maximum: ", np.max(data))
    # print("Data minimum: ", np.min(data))
    #
    # print('Pr(T(fake) >= T(actual)_max | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) >= np.max(data)))
    # print('Pr(T(fake) <= T(actual)_min | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) <= np.min(data)))
    #
    #
    # """ Q 10.1.6 Posterior predictive"""
    # print('Pr(T(fake) >= 20 | data) ', np.mean(np.array(posterior_predictive_disease_outbreaks) >= 20 ))
    """ Q 10.1.7-8 Posterior predictive"""

    a = 3
    b = 0.5
    data = [3, 7, 4, 10, 11, 20]

    disease_outbreaks_model = Disease_outbreaks_model(a, b, data)

    # New poserior
    gamma_posterior, lambda_range = disease_outbreaks_model.gamma_prior()
    plotter(lambda_range, gamma_posterior,
            'The posterior ' + r'$\Gamma$' + '(3 + 35 + 20, 0.5 + 5 + 1)',
            r'$\lambda$', "pdf")

    no_iterations = 10000
    posterior_predictive_disease_outbreaks = disease_outbreaks_model.sample_posterior_predictive_distribution(
        no_iterations)
    print('Pr(T(fake) >= 20 | data) ',
          np.mean(np.array(posterior_predictive_disease_outbreaks) >= 20))
Exemple #3
0
def main():
    """ Load and pre-process data. """

    data_path = (
        'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_NewYorkCrimeUnemployment.csv'
    )
    crime_dataframe = loadData(data_path)

    population = convert_str_to_int(reshape_data(
        crime_dataframe["Population"]))
    violent_crime_count = convert_str_to_int(
        reshape_data(crime_dataframe["Violent_crime_count"]))

    plotter(population, violent_crime_count, "population",
            "violent crime count")
    """ Problem 4.2.3 """
    MLE_estimate = MLE_estimator(violent_crime_count, population)

    print("The MLE is:", MLE_estimate)
Exemple #4
0
def main():
    """ Load and pre-process data. """

    data_path = (
        'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\likelihood_blogVisits.csv'
    )
    time_between_visits = loadData(data_path)
    """ Solution to question 4.1.4 """
    MLE_first_time_visit_rate = compute_MLE_mean_time_between_visits(
        time_between_visits)

    print(
        "The MLE of the first time visit rate for a blog, using 50 data points."
    )
    print("MLE estimate for lambda: ", MLE_first_time_visit_rate)
    """Generating data using the MLE parameter for the visit rate to evaluate our model"""
    generated_beer_visits = generate_data_samples(
        1 / MLE_first_time_visit_rate, len(time_between_visits))

    # Plotting the generated data against the actual data
    plotter(generated_beer_visits, time_between_visits)
Exemple #5
0
def main():
    """ Load and pre-process data. """

    data_path = (
        'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\denominator_NBCoins.csv'
    )
    crime_dataframe = loadData(data_path)

    failuers_before_five_successes = convert_str_to_int(
        reshape_data(crime_dataframe["No failuers before 5 successes"]))

    theta1_range = np.arange(0.001, 0.999, 0.01)
    theta2_range = np.arange(0.001, 0.999, 0.01)
    """ Compute the likelihood over the theta_1 and theta_2 ranges. """
    likelihoods = compute_likelihood(failuers_before_five_successes,
                                     theta1_range, theta2_range)
    """ Create a contour plot over the rwo theta values."""
    plotter(
        theta1_range, theta2_range, likelihoods,
        r'The posterior shape over $\theta_1$ and $\theta_2$ for the Negative Binomial likelihood',
        r"$\theta_1$", r"$\theta_2$")
Exemple #6
0
def main():
    """ Load and pre-process data. """

    data_path = ('C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\posterior_gdpInfantMortality.csv')
    GDP_data = loadData(data_path)

    mortality_data = get_column(GDP_data["infant.mortality"])
    gdp_data = get_column(GDP_data["gdp"])

    # Remove data points with missing data i.e. NaN values.
    gdps, mortalities = remove_missing_data(gdp_data, mortality_data)

    # Fit the data to a normal distribution model i.e. least squares.
    gdp_log_range, mortality_log_values, intercept, slope = fit_normal_dist_model(
        gdps, mortalities)

    # Plot the log(data) and the model.
    plotter(np.log(mortalities), np.log(gdps), "log(mortalities)", 'log(GDP)', gdp_log_range, np.array(mortality_log_values).reshape(-1),
            "A Normal Distribution fit to the logged data - alpha " + str(round(intercept, 3)) + " beta " + str(round(slope, 3)))

    calculate_standard_error(np.log(gdps), np.log(mortalities))

    calculate_RMSE(gdps, mortalities, intercept, slope)
Exemple #7
0
def main():

    data_path = (
        'C:\\Users\\Alexa\\Desktop\\KTH\\EGET\\Bayesian_Ben_Lambert\\GITHUB\\Solutions-to-Problems-in-Bayesian-Statistics\\All_data\\conjugate_epil.csv'
    )
    epilepsy_data = loadData(data_path)

    epilepsy_counts = get_column(epilepsy_data["x"])

    a = 4
    b = 0.25

    eilepsy_model = Epilepsy_model(a, b)
    """ Q 9.2.3 The Gamma Posterior """
    # posterior, theta_range = eilepsy_model.posterior_gamma(epilepsy_counts)
    #
    # plotter(theta_range, posterior, "Gamma(a + " +r'$\sum^n_{i=1}(x_i)$'+", b + n) posterior over " + r'$\theta$', r'$\theta$', "pdf")
    """ Q 9.2.5 The Gamma Posterior Predictive distribution """
    posterior_NB, theta_range = eilepsy_model.posterior_predictive_gamma(
        epilepsy_counts)

    plotter(
        theta_range, posterior_NB, "Gamma(a + " + r'$\sum^n_{i=1}(x_i)$' +
        ", b + n) posterior over " + r'$\theta$', r'$\theta$', "pdf")
Exemple #8
0
def main():

    """ The Multinomial Likelihood  for n_A = 6, n_B = 3 and n_C = 1 """
    Election_Likelihood_Model = Election_model(6, 3, 1, 10)

    likelihoods, pB_range, pA_range = Election_Likelihood_Model.compute_multinomial_likelihood()

    plotter(pA_range, pB_range, likelihoods,
            "Multinomial Likelihood over " + r'$p_A$' + " and " + r'$p_B$' , r'$p_A$' , r'$p_B$')


    """ The Conjugate Dirichlet(1, 1, 1) prior """
    priors, pB_range, pA_range = Election_Likelihood_Model.dirichlet_prior(1, 1, 1)

    plotter(pA_range, pB_range, priors,
                "Dirichlet prior using " + r'$\alpha_A, \alpha_B, \alpha_C = 1 $' + " for  "+ r'$p_A$' +" and " + r'$p_B$' , r'$p_A$' , r'$p_B$', plt.cm.YlGnBu)

    """ The Dirichlet(1, 1, 1) posterior """
    posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior(
        1, 1, 1)

    plotter(pA_range, pB_range, posterior,
            "Posterior with Multinomial Likelihood and Dirichlet(1, 1, 1) prior over  " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$')

    """ Posterior Mean using Dirichlet uniform prior"""
    posterior_mean_dirichlet_uniform_prior = Election_Likelihood_Model.posterior_mean(
        1, 1, 1)
    print("Posterior mean parameters: p_A = ", posterior_mean_dirichlet_uniform_prior[0], " p_B = ", posterior_mean_dirichlet_uniform_prior[1], " p_C = ", posterior_mean_dirichlet_uniform_prior[2])



    """ The Conjugate Dirichlet(10, 10, 10) prior """
    priors, pB_range, pA_range = Election_Likelihood_Model.dirichlet_prior(10, 10, 10)

    plotter(pA_range, pB_range, priors,
                "Dirichlet(10, 10, 10) prior using " + r'$\alpha_A, \alpha_B, \alpha_C = 1 $' + " for  "+ r'$p_A$' +" and " + r'$p_B$' , r'$p_A$' , r'$p_B$', plt.cm.YlGnBu)

    """ The Dirichlet(10+6, 10+3, 10+1) posterior """
    posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior(
        10, 10, 10)

    plotter(pA_range, pB_range, posterior,
            "Posterior with Multinomial Likelihood and Dirichlet(10, 10, 10) prior over  " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$')


    """ The Multinomial Likelihood  for n_A = 60, n_B = 30 and n_C = 10 """

    Election_Likelihood_Model = Election_model(60, 30, 10, 100)

    likelihoods, pB_range, pA_range = Election_Likelihood_Model.compute_multinomial_likelihood()

    plotter(pA_range, pB_range, likelihoods,
            "Multinomial Likelihood over " + r'$p_A$' + " and " + r'$p_B$' , r'$p_A$' , r'$p_B$')


    """ The Dirichlet(10 + 60, 10 + 30, 10 + 10) posterior """
    posterior, pB_range, pA_range = Election_Likelihood_Model.dirichlet_posterior(
        10, 10, 10)

    plotter(pA_range, pB_range, posterior,
            "Posterior Dirichlet(10 + 60, 10 + 30, 10 + 10) with Multinomial Likelihood and Dirichlet(10, 10, 10) prior over  " + r'$p_A$' + " and " + r'$p_B$', r'$p_A$', r'$p_B$')