def run(data, iter):
    boots = []
    for i in range(100, 100000, 1000):
        boot = boostrap(data, data.shape[0], i, 95, 5)
        boots.append([i, boot[0], "mean"])
        boots.append([i, boot[1], "lower"])
        boots.append([i, boot[2], "upper"])

    df_boot = pd.DataFrame(boots,
                           columns=['Boostrap Iterations', 'Mean', "Value"])
    sns_plot = sns.lmplot(df_boot.columns[0],
                          df_boot.columns[1],
                          data=df_boot,
                          fit_reg=False,
                          hue="Value")

    sns_plot.axes[0, 0].set_ylim(0, )
    sns_plot.axes[0, 0].set_xlim(0, 100000)

    sns_plot.savefig("bootstrap_confidence{}.png".format(iter),
                     bbox_inches='tight')
    sns_plot.savefig("bootstrap_confidence{}.pdf".format(iter),
                     bbox_inches='tight')

    print("Mean: {}".format(np.mean(data)))
    print("Var: {}".format(np.var(data)))
Exemple #2
0
    data = df.values.T[1][:79]
    sns_plot2 = sns.distplot(data, bins=20, kde=False, rug=True).get_figure()
    axes = plt.gca()
    axes.set_xlabel('MPG of new vehicles')
    axes.set_ylabel('Vehicle count')
    sns_plot2.savefig("histogramNew.png", bbox_inches='tight')
    sns_plot2.savefig("histogramNew.pdf", bbox_inches='tight')

    plt.clf()
    data_old = df.values.T[0]
    sns_plot3 = sns.distplot(data_old, bins=20, kde=False,
                             rug=True).get_figure()
    axes = plt.gca()
    axes.set_xlabel('MPG of old vehicles')
    axes.set_ylabel('Vehicle count')
    sns_plot2.savefig("histogramOld.png", bbox_inches='tight')
    sns_plot2.savefig("histogramOld.pdf", bbox_inches='tight')

    old = np.array(data_old)
    new = np.array(data)
    print("Mean of old cars: %f" % np.mean(old))
    print("Mean of new cars: %f" % np.mean(new))

    old_bs_mean, old_bs_lower, old_bs_upper = bs.boostrap(
        old, old.shape[0], 100000, 95)
    new_bs_mean, new_bs_lower, new_bs_upper = bs.boostrap(
        new, new.shape[0], 100000, 95)
    print("Old lower: %f, Old mean: %f, Old upper: %f" %
          (old_bs_lower, old_bs_mean, old_bs_upper))
    print("New lower: %f, New mean: %f, New upper: %f" %
          (new_bs_lower, new_bs_mean, new_bs_upper))
Exemple #3
0

# ***** CODE *****

df = pd.read_csv('vehicles.csv')

# Creating Scaterplot and Histogram of fleets.
create_diagrams()

# ***** Standard deviation comparison via the boostrap *****
#std_current_fleet = np.std(df.values.T[0])
#std_proposed_fleet = np.std(df.dropna().values.T[0])
print("## Current fleet")
print("")
print("# Standard deviation")
boot = bootstrap.boostrap(np.std, 10000, df.values.T[0])
print("- upper = " + str(boot[2]))
print("- std-mean = " + str(boot[0]))
print("- lower = " + str(boot[1]))
print("")
print("Mean:")
print("- mean = " + str(np.mean(df.values.T[0])))
print("")

print("## Proposed fleet")
print("")
print("# Standard deviation:")
boot = bootstrap.boostrap(np.std, 10000, df.dropna().values.T[1])
print("- upper = " + str(boot[2]))
print("- std-mean = " + str(boot[0]))
print("- lower = " + str(boot[1]))
Exemple #4
0
    axes.set_ylabel('current fleet count')
    sns_plot_cur.savefig("histogram_cur.png", bbox_inches='tight')

    plt.clf()
    sns_plot_new = sns.distplot(new_flt, bins=20, kde=False,
                                rug=True).get_figure()
    axes = plt.gca()
    axes.set_xlabel('new fleet')
    axes.set_ylabel('new fleet count')
    sns_plot_new.savefig("histogram_new.png", bbox_inches='tight')

    #Exercise: The bootstrap(2)
    mean_cur = np.mean(cur_flt)
    mean_new = np.mean(new_flt)

    boots_cur = boostrap(cur_flt, cur_flt.shape[0], 1000, 0.95)
    boots_new = boostrap(new_flt, new_flt.shape[0], 1000, 0.95)

    upper_cur = boots_cur[2]
    lower_cur = boots_cur[1]

    upper_new = boots_new[2]
    lower_new = boots_new[1]

    print("current fleet:")
    print("mean: ", mean_cur)
    print("upper: ", upper_cur)
    print("lower: ", lower_cur)

    print("-------------------------")
    print("new fleet: ")
Exemple #5
0
    print((("Mean: %f") % (np.mean(data_new_fleet))))
    print((("Median: %f") % (np.median(data_new_fleet))))
    print((("Var: %f") % (np.var(data_new_fleet))))
    print((("std: %f") % (np.std(data_new_fleet))))
    print((("MAD: %f") % (mad(data_new_fleet))))

    plt.clf()
    vehicle_histogram = sns.distplot(data_new_fleet,
                                     bins=20,
                                     kde=False,
                                     rug=True).get_figure()

    axes = plt.gca()
    axes.set_xlabel('New Fleet Vehicle MPG (Miles Per Gallon)')
    axes.set_ylabel('Frequency')

    vehicle_histogram.savefig("vehiclehistogram.png", bbox_inches='tight')
    vehicle_histogram.savefig("vehiclehistogram.pdf", bbox_inches='tight')

    mean_current_fleet = np.mean(data_current_fleet)
    mean_new_fleet = np.mean(data_new_fleet)
    boot_current = bootstrap.boostrap(data_current_fleet,
                                      data_current_fleet.shape[0], 1000)
    boot_new = bootstrap.boostrap(data_new_fleet, data_current_fleet.shape[0],
                                  1000)

    print("Current Fleet: Mean:{} Lower: {} Upper: {}".format(
        mean_current_fleet, boot_current[1], boot_current[2]))
    print("New Fleet: Mean:{} Lower: {} Upper: {}".format(
        mean_new_fleet, boot_new[1], boot_new[2]))