def scatter(x, y, filename=""): df = pd.DataFrame({'x': pd.Series(x), 'y': pd.Series(y)}) p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point() if filename == "": print p else: gg.ggsave(filename="graphs/scatter/" + filename + ".png", plot=p)
def save_plot(plot, extension='svg', size=(10, 5), path='reports/', switch_to_next_figure=True, hide_title=False): from matplotlib import axes import matplotlib as mpl mpl.rcParams['figure.figsize'] = '%s, %s' % size seaborns = [sns.axisgrid.JointGrid, sns.axisgrid.FacetGrid] if type(plot) is ggplot: ggsave(filename=path + plot.title + '.' + extension, plot=plot, width=size[0], height=size[1]) elif type(plot) in seaborns: plot.fig.set_size_inches(*size) plot.fig.savefig(path + plot.ax.title.get_text() + '.' + extension) elif type(plot) is axes.Subplot: plot.figure.set_size_inches(*size) plot.figure.savefig(path + plot.title.get_text() + '.' + extension) elif plot is plt: figure = plt.gcf() axes = plt.gca() title = axes.title.get_text() if hide_title: plt.title('') figure.set_size_inches(*size) figure.savefig(path + title + '.' + extension) else: raise Exception('Unrecognized plot type: %s' % type(plot)) if switch_to_next_figure: new_figure()
def plotSetOfArrays(arrays, names, fileName): IDS = np.linspace(0, 1, arrays[0].shape[0]) A = IDS.reshape(arrays[0].shape[0], 1) for i in range(0, len(arrays)): A = np.concatenate((A, arrays[i]), axis=1) Data = pd.DataFrame(A, columns=['noise'] + names) Melted = pd.melt(Data, id_vars=['noise']) pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'), data=Melted) + ggplot.geom_line() + ggplot.geom_point() ggplot.ggsave(pv, './IMG/' + fileName) output_file("iou_scores.html", title="correlation.py example") figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave") hold() line(IDS, arrays[0][:, 0], color='#A6CEE3', legend=names[0]) line(IDS, arrays[1][:, 0], color='#1F78B4', legend=names[1]) line(IDS, arrays[2][:, 0], color='#B2DF8A', legend=names[2]) line(IDS, arrays[3][:, 0], color='#33A02C', legend=names[3]) line(IDS, arrays[4][:, 0], color='#fb9a99', legend=names[4]) curplot().title = "Minimum IOU" grid().grid_line_alpha = 0.3 show()
def save_figs(self, base_dir, fname, save_types, is_ggplot=False): assert isinstance(base_dir, str) assert isinstance(fname, str) assert isinstance(save_types, collections.Iterable) assert (is_ggplot is False) or isinstance(is_ggplot, gp.ggplot) if 'none' in self.formats: echo("\nld_figures: WARNING: 'none' in --formats, figures are NOT being saved.") return None path = "{pth}.{{ext}}".format(pth=os.path.join(base_dir, fname)) for t in save_types: try: if is_ggplot: gp.ggsave(path.format(ext=t), plot=is_ggplot) # click.echo("\nld_figures: Saved {0}.".format(path.format(ext=t))) else: plt.savefig(path.format(ext=t), bbox_inches='tight') plt.close() # click.echo("\nld_figures: Saved {0}.".format(path.format(ext=t))) except IndexError as exc: if 'index out of bounds' in exc.args[0]: # click.echo("\nld_figures: skipping due to lack of data.") pass
def scatter(x, y, filename=""): df = pd.DataFrame({ 'x': pd.Series(x), 'y': pd.Series(y) }) p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point() if filename == "": print p else: gg.ggsave(filename="graphs/scatter/"+filename+".png", plot=p)
def plot_roc(self, experiment_type, to_plot): # turn this to string for categorical colour scheme to_plot.loc[:, "parameter"] = [str(par) for par in to_plot.loc[:, "parameter"]] p = gg.ggplot(data = to_plot, aesthetics = gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \ gg.geom_line(gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \ gg.ggtitle(experiment_type) + gg.xlab("FPR") + gg.ylab("TPR") gg.ggsave(filename = self.results_path + experiment_type + "_" + self.mode + ".png", plot = p) return
def _ggplot(df, out_file): """Plot faceted items with ggplot wrapper on top of matplotlib. XXX Not yet functional """ import ggplot as gg df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]] df["category"] = [cat_labels[x] for x in df["category"]] df["caller"] = [caller_labels.get(x, None) for x in df["caller"]] p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() + gg.facet_wrap("variant.type", "category") + gg.theme_seaborn()) gg.ggsave(p, out_file)
def plotHistogramMeans(hist,fileName): num_clust = hist.shape[0] IDS = np.mat(range(0,num_clust)) IDS = IDS.reshape(num_clust,1) histD = np.concatenate((IDS,hist),axis=1) Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1])) Melted = pd.melt(Data,id_vars=['ID']) pv = ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID") print "Saving mean histograms" ggplot.ggsave(pv,'./IMG/'+fileName)
def prob231cd_recover(initialization): filename = "results/prob231cd" + initialization tuple_in = pkl.load(open(filename + ".pkl", "rb")) prob231c_plot_df = tuple_in[0] kmcalls = tuple_in[1] num_trials = tuple_in[2] p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \ gg.geom_point() + gg.ggtitle(initialization + " initialization") gg.ggsave(filename + ".png", plot = p) obj = [kmcalls[i].obj for i in range(num_trials)] obj_stats = {"mean":np.mean(obj), "sd":np.std(obj), "min":np.min(obj)} return obj_stats
def prob231cd_recover(initialization): filename = "results/prob231cd" + initialization tuple_in = pkl.load(open(filename + ".pkl", "rb")) prob231c_plot_df = tuple_in[0] kmcalls = tuple_in[1] num_trials = tuple_in[2] p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \ gg.geom_point() + gg.ggtitle(initialization + " initialization") gg.ggsave(filename + ".png", plot=p) obj = [kmcalls[i].obj for i in range(num_trials)] obj_stats = {"mean": np.mean(obj), "sd": np.std(obj), "min": np.min(obj)} return obj_stats
def prob231b(initialization = "regular"): cluster_counts = [2,3,5,10,15,20] kmcalls = [0 for i in cluster_counts] for i, num_clusters in enumerate(cluster_counts): kmcalls[i] = KmeansCall(features_only, num_clusters, initialization) kmcalls[i].run_kmeans(verbose = False) df_to_plot = kmcalls[i].data.copy() df_to_plot["class_label"] = [label for label in kmcalls[i].class_label] p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters)) metadata = "k=" + str(num_clusters) + "_" + datestring gg.ggsave(filename = "results/" + metadata +".png", plot = p)
def prob231b(initialization="regular"): cluster_counts = [2, 3, 5, 10, 15, 20] kmcalls = [0 for i in cluster_counts] for i, num_clusters in enumerate(cluster_counts): kmcalls[i] = KmeansCall(features_only, num_clusters, initialization) kmcalls[i].run_kmeans(verbose=False) df_to_plot = kmcalls[i].data.copy() df_to_plot["class_label"] = [label for label in kmcalls[i].class_label] p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters)) metadata = "k=" + str(num_clusters) + "_" + datestring gg.ggsave(filename="results/" + metadata + ".png", plot=p)
def plotHistogramMeans(hist, fileName): num_clust = hist.shape[0] IDS = np.mat(range(0, num_clust)) IDS = IDS.reshape(num_clust, 1) histD = np.concatenate((IDS, hist), axis=1) Data = pd.DataFrame(histD, columns=['ID'] + range(0, hist.shape[1])) Melted = pd.melt(Data, id_vars=['ID']) pv = ggplot.ggplot( ggplot.aes(x='variable', y='value'), data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID") print "Saving mean histograms" ggplot.ggsave(pv, './IMG/' + fileName)
def googletrend_command(delta_t, threshold=0.0, inverse=False): """the command to run google trend algorithm. :param delta_t: the upper bound for original delta_t parameter :param threshold: upper bound for the threshold of differentiating two classes :param inverse: whether to inverse the classifier """ ## handle filepath and title based on parameter inverse filename = "googletrend" titlename = "ROC of google trend classifier" if inverse: filename += "_inverse" titlename += " (inverse version)" filepath = "./plots/%s.jpg" % filename ## generate data first data = googletrend.preprocess() ## store classifier evaluation metrics into dict output = {} output['tpr'] = [] output['fpr'] = [] output['plot'] = [] for thre in np.arange(0, threshold + 0.1, 0.1): print "==> threshold: %f, inverse: %s" % (thre, inverse) for i in xrange(1, int(delta_t)): googletrend.algorithm(data, i, thre, inverse) tp_rate, fp_rate = googletrend.evaluate(data) # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate) output['tpr'].append(tp_rate) output['fpr'].append(fp_rate) output['plot'].append('thre_' + str(thre)) ## plot ROC graph ## add a y=x baseline for comparison output['tpr'].extend([0.0, 1.0]) output['fpr'].extend([0.0, 1.0]) output['plot'].extend(['baseline', 'baseline']) df = pd.DataFrame(output) graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \ gg.theme_seaborn() + \ gg.ggtitle(titlename) + \ gg.xlab("FPR") + \ gg.ylab("TPR") + \ gg.xlim(0.0, 1.0) + \ gg.ylim(0.0, 1.0) + \ gg.geom_point() + \ gg.geom_line() gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
def prob231g(): filename = "results/prob231g" num_clusters_231g = 3 emcall = EMCall(features_only, labels_only, num_clusters_231g) emcall.run_em() plt.plot(emcall.log_likelihood_record) plt.title("Likelihood over EM iterations") plt.savefig(filename + "_loglike.png") prob231g_plot_df = emcall.data.copy() prob231g_plot_df["class_label"] = [label for label in emcall.class_label] p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("EM cluster assignments") gg.ggsave(filename + "_clusters.png", plot = p) pkl.dump(obj = emcall, file = open(filename + "_a.pkl", "wb")) print("Done with 231g.") return
def prob231g(): filename = "results/prob231g" num_clusters_231g = 3 emcall = EMCall(features_only, labels_only, num_clusters_231g) emcall.run_em() plt.plot(emcall.log_likelihood_record) plt.title("Likelihood over EM iterations") plt.savefig(filename + "_loglike.png") prob231g_plot_df = emcall.data.copy() prob231g_plot_df["class_label"] = [label for label in emcall.class_label] p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("EM cluster assignments") gg.ggsave(filename + "_clusters.png", plot=p) pkl.dump(obj=emcall, file=open(filename + "_a.pkl", "wb")) print("Done with 231g.") return
def plotSetOfArrays(arrays,names,fileName): IDS = np.linspace(0,1,arrays[0].shape[0]) A = IDS.reshape(arrays[0].shape[0],1) for i in range(0,len(arrays)): A = np.concatenate((A,arrays[i]),axis=1) Data = pd.DataFrame(A,columns = ['noise']+names) Melted = pd.melt(Data,id_vars=['noise']) pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'), data=Melted) + ggplot.geom_line() + ggplot.geom_point() ggplot.ggsave(pv,'./IMG/'+fileName) output_file("iou_scores.html", title="correlation.py example") figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave") hold() line(IDS, arrays[0][:,0], color='#A6CEE3', legend=names[0]) line(IDS, arrays[1][:,0], color='#1F78B4', legend=names[1]) line(IDS, arrays[2][:,0], color='#B2DF8A', legend=names[2]) line(IDS, arrays[3][:,0], color='#33A02C', legend=names[3]) line(IDS, arrays[4][:,0], color='#fb9a99', legend=names[4]) curplot().title = "Minimum IOU" grid().grid_line_alpha=0.3 show()
try: session.run(run_script) truematch_mod.append(session.getvalue("mod_delta_m"+str(mc))) runtime_mod.append(gmodel_relaxed.Runtime) except RuntimeError: print ("unable to evaluate true matchin perf of gurobi model in "+fname) except (OSError, NameError, ValueError,RuntimeError): print "unable to process gurobi model in "+fname np.savetxt("runtime.csv", runtime_mod, delimiter=",") np.savetxt("tm.csv", runtime_mod, delimiter=",") tm_df = pd_df({ "tm": truematch_orig + truematch_mod, "runtime": runtime_orig + runtime_mod, "orig_or_mod": ["mod"]*len(runtime_mod) # ["orig"]*nmc }) nmc = len(runtime_mod) #print gg.ggplot(tm_df, aes('orig_or_mod', 'runtime')) + \ # gg.geom_line(colour='steelblue') comp_plot = gg.ggplot(data=tm_df, aesthetics=gg.aes(x='runtime', y='tm')) + gg.geom_point() + gg.scale_x_log10() gg.ggsave("graphmatch_IP_runtime_vs_tm.pdf",plot = comp_plot) print "mean of tm:"+ str(np.mean( truematch_mod))
import pandas as pd import ggplot as gg import pickle as pkl import math from analysis.TestResult import TestResult therunthatworked = "../results/LSH_vs_KDT_????? " f = open(therunthatworked + ".pkl", 'rb') results = pkl.load(f) times = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame( data={ "times": times, "distances": distances, "methods": methods, "m": m, "alpha": alpha }) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "times", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename=therunthatworked + "log2.png", plot=p)
"orig_or_mod": ["orig"]*nmc + ["mod"]*nmc }) p_val_truematch_diff = st.ttest_rel(truematch_orig, truematch_mod) p_val_timediff = st.ttest_rel(runtime_orig, runtime_mod) p_val_truematch_diff = st.wilcoxon(truematch_orig, truematch_mod) p_val_timediff = st.wilcoxon(runtime_orig, runtime_mod) #print gg.ggplot(tm_df, aes('orig_or_mod', 'tm')) + \ # gg.geom_line(colour='steelblue') #print gg.ggplot(tm_df, aes('orig_or_mod', 'runtime')) + \ # gg.geom_line(colour='steelblue') comp_plot = gg.ggplot(data=tm_df, aesthetics=gg.aes(x='runtime', y='tm', colour='orig_or_mod')) + gg.geom_point() + gg.scale_x_log10() gg.ggsave("orig_IP_vs_modified_IP_3.pdf",plot = comp_plot)
multiplier_index_list = \ df[(df.weekday == day) & (df.rain == rain_status)].index df.loc[multiplier_index_list, u'ENTRIESn_hourly'] = \ multiplier * entries_sum ##now we have a dataframe wich is ready to be utilized for making our ##plot using the data contained within. p = ggplot.ggplot(ggplot.aes(x = u'factor(weekday)', \ weight = u'ENTRIESn_hourly', \ fill = u'weekday'),\ data = df) +\ ggplot.geom_bar() +\ ggplot.facet_grid(x = u'rain', y = u'weekday') +\ ggplot.ggtitle('Average Ridership on Sunny & Rainy ISO Weekdays') print p return p if __name__ == '__main__': input_filename = 'turnstile_data_master_with_weather_test.csv' output_filename = 'plot.png' with open(output_filename, 'wb') as f: turnstile_weather = pandas.read_csv(input_filename) plot = plot_weather_data(turnstile_weather) ggplot.ggsave(output_filename, plot) else: pass
def main(): global args, ruleset # Arguments Parser argparser, subparser = parser_setup() register_rules(subparser) args = argparser.parse_args() rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset] rulemod.prepare(args, srand) if args.debug: print "DEBUG: args", args print results = list() pool = multiprocessing.Pool() try: for result in pool.map(rulemod.simulate_rolls, rulemod.variables): results.extend(result) pool.close() pool.join() except KeyboardInterrupt: sys.exit(130) if args.debug: print "DEBUG: results:" pprint(results) print conf = dict() conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"} for item in conf: try: conf[item] = getattr(rulemod, item) except: pass columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"]) data = pandas.DataFrame.from_records(results, columns=columns) # Create and save graphs for gkey in rulemod.graphs: # Graph Defaults graph_conf = conf.copy() graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey) graph_conf["file_suffix"] = str() # colors colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"] colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"] colors_mid = ["#000000"] color_count = len(rulemod.variables) - 1 if color_count % 2 == 0: lower_slice = (color_count / 2) * -1 upper_slice = color_count / 2 else: lower_slice = ((color_count - 1) / 2) * -1 upper_slice = (color_count + 1) / 2 graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice] # graph_conf from graph graph_items = ( "color_list", "file_prefix", "file_suffix", "graph_type", "limits", "x_breaks", "x_labels", "title", "vlab", "xlab", "ylab", ) for item in graph_items: try: graph_conf[item] = rulemod.graphs[gkey][item] except: try: graph_conf[item] = getattr(rulemod, item) except: if item not in graph_conf: graph_conf[item] = None if args.debug: print "DEBUG: graph_conf:" pprint(graph_conf) print # plot_data plot_data = data.copy() plot_data = plot_data[plot_data["Graph"] == gkey] plot_data.rename( columns={ conf["vlab"]: graph_conf["vlab"], conf["xlab"]: graph_conf["xlab"], conf["ylab"]: graph_conf["ylab"], }, inplace=True, ) plot_data.index = range(1, len(plot_data) + 1) if args.debug: print "DEBUG: plot_data:" pprint(plot_data) print # Create plot if args.graph: plot = ( ggplot.ggplot( ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data ) + ggplot.ggtitle(graph_conf["title"]) + ggplot.theme_gray() + ggplot.scale_colour_manual(values=graph_conf["color_list"]) ) plot.rcParams["font.family"] = "monospace" if graph_conf["x_breaks"] and graph_conf["x_labels"]: plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"]) if graph_conf["limits"]: plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1]) if graph_conf["graph_type"] == "bars": plot += ggplot.geom_line(size=20) text_data = plot_data[plot_data["Count"] > 0] text_data.index = range(0, len(text_data)) outcomes = dict(text_data[graph_conf["xlab"]]) percents = dict(text_data[graph_conf["ylab"]]) for k in outcomes: percent = "%4.1f%%" % percents[k] x = outcomes[k] y = percents[k] + 4 color = graph_conf["color_list"][k] plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color) else: plot += ggplot.geom_line() plot += ggplot.geom_point(alpha=0.3, size=50) if hasattr(rulemod, "update_plot"): plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data) if args.dumpsave: filename = "/dev/null" else: filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"]) ggplot.ggsave(filename, plot, format="png", dpi=300) return 0
m=1, alpha=alpha)) #save results to results folder, with plot and printing to screen. metadata = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode) f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode='w') pkl.dump(obj=results, file=f) logtimes = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame( data={ "logtimes": logtimes, "distances": distances, "methods": methods, "m": m, "alpha": alpha }) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("Log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot=p)
import pandas as pd import ggplot as gg import pickle as pkl import math from analysis.TestResult import TestResult therunthatworked = "../results/LSH_vs_KDT_????? " f = open(therunthatworked + ".pkl", 'rb') results = pkl.load(f) times = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame(data = {"times" : times, "distances" : distances, "methods" : methods, "m":m, "alpha": alpha}) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "times", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename=therunthatworked + "log2.png", plot = p)
def firms_dynamics_plot(decision): data = pd.read_csv(os.path.join( parameters.OUTPUT_PATH, "temp_general_firms_pop_%s_decision_%s_time_%s.txt" % (parameters.pop_redutor, decision, parameters.final_Time)), sep=",", header=None, decimal=",").astype(float) # renaming the collunms names data.columns = [ 'time', 'total_firms', 'average_output', 'average_age', 'average_size', 'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort' ] #logical test to control the process of burn the initial if parameters.time_to_cut_plots > 0: data = data.loc[( data['time']).astype(int) >= parameters.time_to_cut_plots, :] # variable to add in the plot title title_pop_val = float(parameters.pop_redutor) * 100 # create a list of a years to plot list_of_years_division = list( range(int(data['time'].min()), int(data['time'].max()), 12)) + [data['time'].max() + 1] list_of_years = [int(i / 12) for i in list_of_years_division] # graph paramter variables dpi_var_plot = 700 width_var_plot = 15 height_var_plot = 10 ############################################################################################################### # plotting AGENTS UTILITY # Total firms plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\ gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of output plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of age plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of size plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # number of new firms plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Number of firms out plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average and max size of firms dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_effort'], data.iloc[:, data.columns == 'total_effort'] ], axis=1) plot_data = dat_merged.plot( title='Average and maximum effort of employees') plot_data.set_xlabel('Years') plot_data.set_ylabel('Values units of effort') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot) dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_size'], data.iloc[:, data.columns == 'max_size'] ], axis=1) plot_data = dat_merged.plot(title='Average and maximum size firms') plot_data.set_xlabel('Years') plot_data.set_ylabel('Number of employees') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot)
#Testing results = [] for m in mvals: results.append(test_approx_nn(method = "hashing", traindata=docdata, testdata = testdata, m=m, alpha=1)) for alpha in avals: results.append(test_approx_nn(method = "kdtree" , traindata=docdata, testdata = testdata, m=1, alpha=alpha)) #save results to results folder, with plot and printing to screen. metadata = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode) f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode = 'w') pkl.dump(obj=results, file=f) logtimes = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame(data = {"logtimes" : logtimes, "distances" : distances, "methods" : methods, "m":m, "alpha": alpha}) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("Log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot = p)