def run(data, iter): boots = [] for i in range(100, 100000, 1000): boot = boostrap(data, data.shape[0], i, 95, 5) boots.append([i, boot[0], "mean"]) boots.append([i, boot[1], "lower"]) boots.append([i, boot[2], "upper"]) df_boot = pd.DataFrame(boots, columns=['Boostrap Iterations', 'Mean', "Value"]) sns_plot = sns.lmplot(df_boot.columns[0], df_boot.columns[1], data=df_boot, fit_reg=False, hue="Value") sns_plot.axes[0, 0].set_ylim(0, ) sns_plot.axes[0, 0].set_xlim(0, 100000) sns_plot.savefig("bootstrap_confidence{}.png".format(iter), bbox_inches='tight') sns_plot.savefig("bootstrap_confidence{}.pdf".format(iter), bbox_inches='tight') print("Mean: {}".format(np.mean(data))) print("Var: {}".format(np.var(data)))
data = df.values.T[1][:79] sns_plot2 = sns.distplot(data, bins=20, kde=False, rug=True).get_figure() axes = plt.gca() axes.set_xlabel('MPG of new vehicles') axes.set_ylabel('Vehicle count') sns_plot2.savefig("histogramNew.png", bbox_inches='tight') sns_plot2.savefig("histogramNew.pdf", bbox_inches='tight') plt.clf() data_old = df.values.T[0] sns_plot3 = sns.distplot(data_old, bins=20, kde=False, rug=True).get_figure() axes = plt.gca() axes.set_xlabel('MPG of old vehicles') axes.set_ylabel('Vehicle count') sns_plot2.savefig("histogramOld.png", bbox_inches='tight') sns_plot2.savefig("histogramOld.pdf", bbox_inches='tight') old = np.array(data_old) new = np.array(data) print("Mean of old cars: %f" % np.mean(old)) print("Mean of new cars: %f" % np.mean(new)) old_bs_mean, old_bs_lower, old_bs_upper = bs.boostrap( old, old.shape[0], 100000, 95) new_bs_mean, new_bs_lower, new_bs_upper = bs.boostrap( new, new.shape[0], 100000, 95) print("Old lower: %f, Old mean: %f, Old upper: %f" % (old_bs_lower, old_bs_mean, old_bs_upper)) print("New lower: %f, New mean: %f, New upper: %f" % (new_bs_lower, new_bs_mean, new_bs_upper))
# ***** CODE ***** df = pd.read_csv('vehicles.csv') # Creating Scaterplot and Histogram of fleets. create_diagrams() # ***** Standard deviation comparison via the boostrap ***** #std_current_fleet = np.std(df.values.T[0]) #std_proposed_fleet = np.std(df.dropna().values.T[0]) print("## Current fleet") print("") print("# Standard deviation") boot = bootstrap.boostrap(np.std, 10000, df.values.T[0]) print("- upper = " + str(boot[2])) print("- std-mean = " + str(boot[0])) print("- lower = " + str(boot[1])) print("") print("Mean:") print("- mean = " + str(np.mean(df.values.T[0]))) print("") print("## Proposed fleet") print("") print("# Standard deviation:") boot = bootstrap.boostrap(np.std, 10000, df.dropna().values.T[1]) print("- upper = " + str(boot[2])) print("- std-mean = " + str(boot[0])) print("- lower = " + str(boot[1]))
axes.set_ylabel('current fleet count') sns_plot_cur.savefig("histogram_cur.png", bbox_inches='tight') plt.clf() sns_plot_new = sns.distplot(new_flt, bins=20, kde=False, rug=True).get_figure() axes = plt.gca() axes.set_xlabel('new fleet') axes.set_ylabel('new fleet count') sns_plot_new.savefig("histogram_new.png", bbox_inches='tight') #Exercise: The bootstrap(2) mean_cur = np.mean(cur_flt) mean_new = np.mean(new_flt) boots_cur = boostrap(cur_flt, cur_flt.shape[0], 1000, 0.95) boots_new = boostrap(new_flt, new_flt.shape[0], 1000, 0.95) upper_cur = boots_cur[2] lower_cur = boots_cur[1] upper_new = boots_new[2] lower_new = boots_new[1] print("current fleet:") print("mean: ", mean_cur) print("upper: ", upper_cur) print("lower: ", lower_cur) print("-------------------------") print("new fleet: ")
print((("Mean: %f") % (np.mean(data_new_fleet)))) print((("Median: %f") % (np.median(data_new_fleet)))) print((("Var: %f") % (np.var(data_new_fleet)))) print((("std: %f") % (np.std(data_new_fleet)))) print((("MAD: %f") % (mad(data_new_fleet)))) plt.clf() vehicle_histogram = sns.distplot(data_new_fleet, bins=20, kde=False, rug=True).get_figure() axes = plt.gca() axes.set_xlabel('New Fleet Vehicle MPG (Miles Per Gallon)') axes.set_ylabel('Frequency') vehicle_histogram.savefig("vehiclehistogram.png", bbox_inches='tight') vehicle_histogram.savefig("vehiclehistogram.pdf", bbox_inches='tight') mean_current_fleet = np.mean(data_current_fleet) mean_new_fleet = np.mean(data_new_fleet) boot_current = bootstrap.boostrap(data_current_fleet, data_current_fleet.shape[0], 1000) boot_new = bootstrap.boostrap(data_new_fleet, data_current_fleet.shape[0], 1000) print("Current Fleet: Mean:{} Lower: {} Upper: {}".format( mean_current_fleet, boot_current[1], boot_current[2])) print("New Fleet: Mean:{} Lower: {} Upper: {}".format( mean_new_fleet, boot_new[1], boot_new[2]))