Example #1
0
def scatter(x, y, filename=""):
    df = pd.DataFrame({'x': pd.Series(x), 'y': pd.Series(y)})
    p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point()
    if filename == "":
        print p
    else:
        gg.ggsave(filename="graphs/scatter/" + filename + ".png", plot=p)
Example #2
0
def save_plot(plot,
              extension='svg',
              size=(10, 5),
              path='reports/',
              switch_to_next_figure=True,
              hide_title=False):
    from matplotlib import axes
    import matplotlib as mpl
    mpl.rcParams['figure.figsize'] = '%s, %s' % size

    seaborns = [sns.axisgrid.JointGrid, sns.axisgrid.FacetGrid]

    if type(plot) is ggplot:
        ggsave(filename=path + plot.title + '.' + extension,
               plot=plot,
               width=size[0],
               height=size[1])
    elif type(plot) in seaborns:
        plot.fig.set_size_inches(*size)
        plot.fig.savefig(path + plot.ax.title.get_text() + '.' + extension)
    elif type(plot) is axes.Subplot:
        plot.figure.set_size_inches(*size)
        plot.figure.savefig(path + plot.title.get_text() + '.' + extension)
    elif plot is plt:
        figure = plt.gcf()
        axes = plt.gca()
        title = axes.title.get_text()
        if hide_title:
            plt.title('')
        figure.set_size_inches(*size)
        figure.savefig(path + title + '.' + extension)
    else:
        raise Exception('Unrecognized plot type: %s' % type(plot))
    if switch_to_next_figure:
        new_figure()
Example #3
0
def plotSetOfArrays(arrays, names, fileName):
    IDS = np.linspace(0, 1, arrays[0].shape[0])
    A = IDS.reshape(arrays[0].shape[0], 1)
    for i in range(0, len(arrays)):
        A = np.concatenate((A, arrays[i]), axis=1)
    Data = pd.DataFrame(A, columns=['noise'] + names)
    Melted = pd.melt(Data, id_vars=['noise'])

    pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'),
                       data=Melted) + ggplot.geom_line() + ggplot.geom_point()
    ggplot.ggsave(pv, './IMG/' + fileName)

    output_file("iou_scores.html", title="correlation.py example")

    figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave")
    hold()
    line(IDS, arrays[0][:, 0], color='#A6CEE3', legend=names[0])
    line(IDS, arrays[1][:, 0], color='#1F78B4', legend=names[1])
    line(IDS, arrays[2][:, 0], color='#B2DF8A', legend=names[2])
    line(IDS, arrays[3][:, 0], color='#33A02C', legend=names[3])
    line(IDS, arrays[4][:, 0], color='#fb9a99', legend=names[4])

    curplot().title = "Minimum IOU"
    grid().grid_line_alpha = 0.3
    show()
    def save_figs(self, base_dir, fname, save_types, is_ggplot=False):
        assert isinstance(base_dir, str)
        assert isinstance(fname, str)
        assert isinstance(save_types, collections.Iterable)
        assert (is_ggplot is False) or isinstance(is_ggplot, gp.ggplot)

        if 'none' in self.formats:
            echo("\nld_figures: WARNING: 'none' in --formats, figures are NOT being saved.")
            return None

        path = "{pth}.{{ext}}".format(pth=os.path.join(base_dir, fname))

        for t in save_types:
            try:
                if is_ggplot:
                    gp.ggsave(path.format(ext=t), plot=is_ggplot)
                    # click.echo("\nld_figures: Saved {0}.".format(path.format(ext=t)))
                else:
                    plt.savefig(path.format(ext=t), bbox_inches='tight')
                    plt.close()
                    # click.echo("\nld_figures: Saved {0}.".format(path.format(ext=t)))
            except IndexError as exc:
                if 'index out of bounds' in exc.args[0]:
                    # click.echo("\nld_figures: skipping due to lack of data.")
                    pass
Example #5
0
def scatter(x, y, filename=""):
    df = pd.DataFrame({ 'x': pd.Series(x), 'y': pd.Series(y) })
    p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point()
    if filename == "":
        print p
    else:
        gg.ggsave(filename="graphs/scatter/"+filename+".png", plot=p)
 def plot_roc(self, experiment_type, to_plot):
     # turn this to string for categorical colour scheme
     to_plot.loc[:, "parameter"] = [str(par) for par in to_plot.loc[:, "parameter"]]
     p = gg.ggplot(data = to_plot, aesthetics = gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.geom_line(gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.ggtitle(experiment_type) + gg.xlab("FPR") + gg.ylab("TPR")
     gg.ggsave(filename = self.results_path + experiment_type + "_" + self.mode + ".png", plot = p)
     return
Example #7
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() +
         gg.facet_wrap("variant.type", "category") + gg.theme_seaborn())
    gg.ggsave(p, out_file)
Example #8
0
def plotHistogramMeans(hist,fileName):
  num_clust = hist.shape[0]
  IDS = np.mat(range(0,num_clust))
  IDS = IDS.reshape(num_clust,1)

  histD = np.concatenate((IDS,hist),axis=1)

  Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1]))
  Melted = pd.melt(Data,id_vars=['ID'])
  pv =  ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) +  ggplot.geom_line()  + ggplot.facet_wrap("ID")
  print "Saving mean histograms"
  ggplot.ggsave(pv,'./IMG/'+fileName)
def prob231cd_recover(initialization):
    filename = "results/prob231cd" + initialization
    tuple_in = pkl.load(open(filename + ".pkl", "rb"))
    prob231c_plot_df = tuple_in[0]
    kmcalls = tuple_in[1]
    num_trials = tuple_in[2]
    p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \
        gg.geom_point() + gg.ggtitle(initialization + " initialization")
    gg.ggsave(filename + ".png", plot = p)
    obj = [kmcalls[i].obj for i in range(num_trials)]
    obj_stats = {"mean":np.mean(obj), "sd":np.std(obj), "min":np.min(obj)}
    return obj_stats
Example #10
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar()
         + gg.facet_wrap("variant.type", "category")
         + gg.theme_seaborn())
    gg.ggsave(p, out_file)
def prob231cd_recover(initialization):
    filename = "results/prob231cd" + initialization
    tuple_in = pkl.load(open(filename + ".pkl", "rb"))
    prob231c_plot_df = tuple_in[0]
    kmcalls = tuple_in[1]
    num_trials = tuple_in[2]
    p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \
        gg.geom_point() + gg.ggtitle(initialization + " initialization")
    gg.ggsave(filename + ".png", plot=p)
    obj = [kmcalls[i].obj for i in range(num_trials)]
    obj_stats = {"mean": np.mean(obj), "sd": np.std(obj), "min": np.min(obj)}
    return obj_stats
def prob231b(initialization = "regular"):
    cluster_counts = [2,3,5,10,15,20]
    kmcalls = [0 for i in cluster_counts]
    for i, num_clusters in enumerate(cluster_counts):
        kmcalls[i] = KmeansCall(features_only, num_clusters, initialization)
        kmcalls[i].run_kmeans(verbose = False)

        df_to_plot = kmcalls[i].data.copy()
        df_to_plot["class_label"] = [label for label in kmcalls[i].class_label]
        p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters))
        metadata = "k=" + str(num_clusters) + "_" + datestring
        gg.ggsave(filename = "results/" + metadata +".png", plot = p)
def prob231b(initialization="regular"):
    cluster_counts = [2, 3, 5, 10, 15, 20]
    kmcalls = [0 for i in cluster_counts]
    for i, num_clusters in enumerate(cluster_counts):
        kmcalls[i] = KmeansCall(features_only, num_clusters, initialization)
        kmcalls[i].run_kmeans(verbose=False)

        df_to_plot = kmcalls[i].data.copy()
        df_to_plot["class_label"] = [label for label in kmcalls[i].class_label]
        p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters))
        metadata = "k=" + str(num_clusters) + "_" + datestring
        gg.ggsave(filename="results/" + metadata + ".png", plot=p)
Example #14
0
def plotHistogramMeans(hist, fileName):
    num_clust = hist.shape[0]
    IDS = np.mat(range(0, num_clust))
    IDS = IDS.reshape(num_clust, 1)

    histD = np.concatenate((IDS, hist), axis=1)

    Data = pd.DataFrame(histD, columns=['ID'] + range(0, hist.shape[1]))
    Melted = pd.melt(Data, id_vars=['ID'])
    pv = ggplot.ggplot(
        ggplot.aes(x='variable', y='value'),
        data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID")
    print "Saving mean histograms"
    ggplot.ggsave(pv, './IMG/' + fileName)
Example #15
0
def googletrend_command(delta_t, threshold=0.0, inverse=False):
    """the command to run google trend algorithm.

	:param delta_t:   the upper bound for original delta_t parameter
    :param threshold: upper bound for the threshold of differentiating two classes
    :param inverse:   whether to inverse the classifier
	"""
    ## handle filepath and title based on parameter inverse
    filename = "googletrend"
    titlename = "ROC of google trend classifier"
    if inverse:
        filename += "_inverse"
        titlename += " (inverse version)"
    filepath = "./plots/%s.jpg" % filename
    ## generate data first
    data = googletrend.preprocess()
    ## store classifier evaluation metrics into dict
    output = {}
    output['tpr'] = []
    output['fpr'] = []
    output['plot'] = []
    for thre in np.arange(0, threshold + 0.1, 0.1):
        print "==> threshold: %f, inverse: %s" % (thre, inverse)
        for i in xrange(1, int(delta_t)):
            googletrend.algorithm(data, i, thre, inverse)
            tp_rate, fp_rate = googletrend.evaluate(data)
            # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate)
            output['tpr'].append(tp_rate)
            output['fpr'].append(fp_rate)
            output['plot'].append('thre_' + str(thre))
    ## plot ROC graph
    ## add a y=x baseline for comparison
    output['tpr'].extend([0.0, 1.0])
    output['fpr'].extend([0.0, 1.0])
    output['plot'].extend(['baseline', 'baseline'])
    df = pd.DataFrame(output)
    graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \
      gg.theme_seaborn() + \
      gg.ggtitle(titlename) + \
         gg.xlab("FPR") + \
         gg.ylab("TPR") + \
         gg.xlim(0.0, 1.0) + \
         gg.ylim(0.0, 1.0) + \
      gg.geom_point() + \
      gg.geom_line()
    gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
def prob231g():
    filename = "results/prob231g"

    num_clusters_231g = 3
    emcall = EMCall(features_only, labels_only, num_clusters_231g)
    emcall.run_em()

    plt.plot(emcall.log_likelihood_record)
    plt.title("Likelihood over EM iterations")
    plt.savefig(filename + "_loglike.png")

    prob231g_plot_df = emcall.data.copy()
    prob231g_plot_df["class_label"] = [label for label in emcall.class_label]
    p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("EM cluster assignments")
    gg.ggsave(filename + "_clusters.png", plot = p)

    pkl.dump(obj = emcall, file = open(filename + "_a.pkl", "wb"))
    print("Done with 231g.")
    return
def prob231g():
    filename = "results/prob231g"

    num_clusters_231g = 3
    emcall = EMCall(features_only, labels_only, num_clusters_231g)
    emcall.run_em()

    plt.plot(emcall.log_likelihood_record)
    plt.title("Likelihood over EM iterations")
    plt.savefig(filename + "_loglike.png")

    prob231g_plot_df = emcall.data.copy()
    prob231g_plot_df["class_label"] = [label for label in emcall.class_label]
    p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("EM cluster assignments")
    gg.ggsave(filename + "_clusters.png", plot=p)

    pkl.dump(obj=emcall, file=open(filename + "_a.pkl", "wb"))
    print("Done with 231g.")
    return
Example #18
0
def plotSetOfArrays(arrays,names,fileName):
  IDS = np.linspace(0,1,arrays[0].shape[0])
  A = IDS.reshape(arrays[0].shape[0],1)
  for i in range(0,len(arrays)):
    A = np.concatenate((A,arrays[i]),axis=1)
  Data = pd.DataFrame(A,columns = ['noise']+names)
  Melted = pd.melt(Data,id_vars=['noise'])

  pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'), data=Melted) +  ggplot.geom_line() + ggplot.geom_point()
  ggplot.ggsave(pv,'./IMG/'+fileName)

  output_file("iou_scores.html", title="correlation.py example")

  figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave")
  hold()
  line(IDS, arrays[0][:,0], color='#A6CEE3', legend=names[0])
  line(IDS, arrays[1][:,0], color='#1F78B4', legend=names[1])
  line(IDS, arrays[2][:,0], color='#B2DF8A', legend=names[2])
  line(IDS, arrays[3][:,0], color='#33A02C', legend=names[3])
  line(IDS, arrays[4][:,0], color='#fb9a99', legend=names[4])

  curplot().title = "Minimum IOU"
  grid().grid_line_alpha=0.3
  show()
            try:
                session.run(run_script)
                truematch_mod.append(session.getvalue("mod_delta_m"+str(mc)))
                runtime_mod.append(gmodel_relaxed.Runtime)
            except RuntimeError:
                print ("unable to evaluate true matchin perf of gurobi model in "+fname)
        except (OSError, NameError, ValueError,RuntimeError):
            print "unable to process gurobi model in "+fname



np.savetxt("runtime.csv", runtime_mod, delimiter=",")
np.savetxt("tm.csv", runtime_mod, delimiter=",")

tm_df = pd_df({
    "tm": truematch_orig + truematch_mod,
    "runtime": runtime_orig + runtime_mod,
    "orig_or_mod": ["mod"]*len(runtime_mod) # ["orig"]*nmc
})
nmc = len(runtime_mod)




#print gg.ggplot(tm_df, aes('orig_or_mod', 'runtime')) + \
#  gg.geom_line(colour='steelblue')

comp_plot = gg.ggplot(data=tm_df, aesthetics=gg.aes(x='runtime', y='tm')) + gg.geom_point() + gg.scale_x_log10()
gg.ggsave("graphmatch_IP_runtime_vs_tm.pdf",plot = comp_plot)
print   "mean of tm:"+ str(np.mean( truematch_mod))
Example #20
0
import pandas as pd
import ggplot as gg
import pickle as pkl
import math
from analysis.TestResult import TestResult
therunthatworked = "../results/LSH_vs_KDT_????? "
f = open(therunthatworked + ".pkl", 'rb')
results = pkl.load(f)
times = [math.log(r.avg_time, 2) for r in results]
distances = [r.avg_distance for r in results]
methods = [r.method[0:3] for r in results]
alpha = [r.alpha for r in results]
m = [r.m for r in results]
results_df = pd.DataFrame(
    data={
        "times": times,
        "distances": distances,
        "methods": methods,
        "m": m,
        "alpha": alpha
    })
print results_df
p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "times",
                                                     y = "distances",
                                                     label = "methods")) + \
    gg.geom_text() + \
    gg.ggtitle("LSH and KD trees: tradeoffs") + \
    gg.xlab("log2 average query time  ") + gg.ylab("Average L2 distance from query point)")
gg.ggsave(filename=therunthatworked + "log2.png", plot=p)
    "orig_or_mod": ["orig"]*nmc + ["mod"]*nmc
})

p_val_truematch_diff = st.ttest_rel(truematch_orig, truematch_mod)
p_val_timediff = st.ttest_rel(runtime_orig, runtime_mod)

p_val_truematch_diff = st.wilcoxon(truematch_orig, truematch_mod)
p_val_timediff = st.wilcoxon(runtime_orig, runtime_mod)

#print gg.ggplot(tm_df, aes('orig_or_mod', 'tm')) + \
#  gg.geom_line(colour='steelblue')


#print gg.ggplot(tm_df, aes('orig_or_mod', 'runtime')) + \
#  gg.geom_line(colour='steelblue')

comp_plot = gg.ggplot(data=tm_df, aesthetics=gg.aes(x='runtime', y='tm', colour='orig_or_mod')) + gg.geom_point() + gg.scale_x_log10()
gg.ggsave("orig_IP_vs_modified_IP_3.pdf",plot = comp_plot)












Example #22
0
            multiplier_index_list = \
                df[(df.weekday == day) & (df.rain == rain_status)].index

            df.loc[multiplier_index_list, u'ENTRIESn_hourly'] = \
                multiplier * entries_sum

    ##now we have a dataframe wich is ready to be utilized for making our
    ##plot using the data contained within.

    p = ggplot.ggplot(ggplot.aes(x = u'factor(weekday)', \
                                 weight = u'ENTRIESn_hourly', \
                                 fill = u'weekday'),\
                      data = df) +\
        ggplot.geom_bar() +\
        ggplot.facet_grid(x = u'rain', y = u'weekday') +\
        ggplot.ggtitle('Average Ridership on Sunny & Rainy ISO Weekdays')
    print p
    return p


if __name__ == '__main__':
    input_filename = 'turnstile_data_master_with_weather_test.csv'
    output_filename = 'plot.png'
    with open(output_filename, 'wb') as f:
        turnstile_weather = pandas.read_csv(input_filename)
        plot =  plot_weather_data(turnstile_weather)
        ggplot.ggsave(output_filename, plot)
else:
    pass
Example #23
0
            multiplier_index_list = \
                df[(df.weekday == day) & (df.rain == rain_status)].index

            df.loc[multiplier_index_list, u'ENTRIESn_hourly'] = \
                multiplier * entries_sum

    ##now we have a dataframe wich is ready to be utilized for making our
    ##plot using the data contained within.

    p = ggplot.ggplot(ggplot.aes(x = u'factor(weekday)', \
                                 weight = u'ENTRIESn_hourly', \
                                 fill = u'weekday'),\
                      data = df) +\
        ggplot.geom_bar() +\
        ggplot.facet_grid(x = u'rain', y = u'weekday') +\
        ggplot.ggtitle('Average Ridership on Sunny & Rainy ISO Weekdays')
    print p
    return p


if __name__ == '__main__':
    input_filename = 'turnstile_data_master_with_weather_test.csv'
    output_filename = 'plot.png'
    with open(output_filename, 'wb') as f:
        turnstile_weather = pandas.read_csv(input_filename)
        plot = plot_weather_data(turnstile_weather)
        ggplot.ggsave(output_filename, plot)
else:
    pass
Example #24
0
def main():
    global args, ruleset
    # Arguments Parser
    argparser, subparser = parser_setup()
    register_rules(subparser)
    args = argparser.parse_args()
    rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset]
    rulemod.prepare(args, srand)

    if args.debug:
        print "DEBUG: args", args
        print

    results = list()
    pool = multiprocessing.Pool()
    try:
        for result in pool.map(rulemod.simulate_rolls, rulemod.variables):
            results.extend(result)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        sys.exit(130)
    if args.debug:
        print "DEBUG: results:"
        pprint(results)
        print

    conf = dict()
    conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"}
    for item in conf:
        try:
            conf[item] = getattr(rulemod, item)
        except:
            pass

    columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"])
    data = pandas.DataFrame.from_records(results, columns=columns)

    # Create and save graphs
    for gkey in rulemod.graphs:
        # Graph Defaults
        graph_conf = conf.copy()
        graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey)
        graph_conf["file_suffix"] = str()
        # colors
        colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"]
        colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"]
        colors_mid = ["#000000"]
        color_count = len(rulemod.variables) - 1
        if color_count % 2 == 0:
            lower_slice = (color_count / 2) * -1
            upper_slice = color_count / 2
        else:
            lower_slice = ((color_count - 1) / 2) * -1
            upper_slice = (color_count + 1) / 2
        graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice]

        # graph_conf from graph
        graph_items = (
            "color_list",
            "file_prefix",
            "file_suffix",
            "graph_type",
            "limits",
            "x_breaks",
            "x_labels",
            "title",
            "vlab",
            "xlab",
            "ylab",
        )
        for item in graph_items:
            try:
                graph_conf[item] = rulemod.graphs[gkey][item]
            except:
                try:
                    graph_conf[item] = getattr(rulemod, item)
                except:
                    if item not in graph_conf:
                        graph_conf[item] = None
        if args.debug:
            print "DEBUG: graph_conf:"
            pprint(graph_conf)
            print

        # plot_data
        plot_data = data.copy()
        plot_data = plot_data[plot_data["Graph"] == gkey]
        plot_data.rename(
            columns={
                conf["vlab"]: graph_conf["vlab"],
                conf["xlab"]: graph_conf["xlab"],
                conf["ylab"]: graph_conf["ylab"],
            },
            inplace=True,
        )
        plot_data.index = range(1, len(plot_data) + 1)
        if args.debug:
            print "DEBUG: plot_data:"
            pprint(plot_data)
            print

        # Create plot
        if args.graph:
            plot = (
                ggplot.ggplot(
                    ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data
                )
                + ggplot.ggtitle(graph_conf["title"])
                + ggplot.theme_gray()
                + ggplot.scale_colour_manual(values=graph_conf["color_list"])
            )
            plot.rcParams["font.family"] = "monospace"
            if graph_conf["x_breaks"] and graph_conf["x_labels"]:
                plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"])
            if graph_conf["limits"]:
                plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1])
            if graph_conf["graph_type"] == "bars":
                plot += ggplot.geom_line(size=20)
                text_data = plot_data[plot_data["Count"] > 0]
                text_data.index = range(0, len(text_data))
                outcomes = dict(text_data[graph_conf["xlab"]])
                percents = dict(text_data[graph_conf["ylab"]])
                for k in outcomes:
                    percent = "%4.1f%%" % percents[k]
                    x = outcomes[k]
                    y = percents[k] + 4
                    color = graph_conf["color_list"][k]
                    plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color)
            else:
                plot += ggplot.geom_line()
                plot += ggplot.geom_point(alpha=0.3, size=50)
            if hasattr(rulemod, "update_plot"):
                plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data)
            if args.dumpsave:
                filename = "/dev/null"
            else:
                filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"])
            ggplot.ggsave(filename, plot, format="png", dpi=300)

    return 0
Example #25
0
                           m=1,
                           alpha=alpha))

    #save results to results folder, with plot and printing to screen.
    metadata = datetime.datetime.now().strftime(
        "%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode)
    f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode='w')
    pkl.dump(obj=results, file=f)

    logtimes = [math.log(r.avg_time, 2) for r in results]
    distances = [r.avg_distance for r in results]
    methods = [r.method[0:3] for r in results]
    alpha = [r.alpha for r in results]
    m = [r.m for r in results]
    results_df = pd.DataFrame(
        data={
            "logtimes": logtimes,
            "distances": distances,
            "methods": methods,
            "m": m,
            "alpha": alpha
        })
    print results_df
    p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes",
                                                         y = "distances",
                                                         label = "methods")) + \
        gg.geom_text() + \
        gg.ggtitle("LSH and KD trees: tradeoffs") + \
        gg.xlab("Log2 average query time  ") + gg.ylab("Average L2 distance from query point)")
    gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot=p)
import pandas as pd
import ggplot as gg
import pickle as pkl
import math
from analysis.TestResult import TestResult
therunthatworked = "../results/LSH_vs_KDT_????? "
f = open(therunthatworked + ".pkl", 'rb')
results = pkl.load(f)
times =     [math.log(r.avg_time, 2)     for r in results]
distances = [r.avg_distance for r in results]
methods =   [r.method[0:3]  for r in results]
alpha =     [r.alpha  for r in results]
m =         [r.m  for r in results]
results_df = pd.DataFrame(data = {"times" : times,
                                  "distances" : distances,
                                  "methods" : methods,
                                  "m":m,
                                  "alpha": alpha})
print results_df
p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "times",
                                                     y = "distances",
                                                     label = "methods")) + \
    gg.geom_text() + \
    gg.ggtitle("LSH and KD trees: tradeoffs") + \
    gg.xlab("log2 average query time  ") + gg.ylab("Average L2 distance from query point)")
gg.ggsave(filename=therunthatworked + "log2.png", plot = p)
def firms_dynamics_plot(decision):
    data = pd.read_csv(os.path.join(
        parameters.OUTPUT_PATH,
        "temp_general_firms_pop_%s_decision_%s_time_%s.txt" %
        (parameters.pop_redutor, decision, parameters.final_Time)),
                       sep=",",
                       header=None,
                       decimal=",").astype(float)
    # renaming the collunms names
    data.columns = [
        'time', 'total_firms', 'average_output', 'average_age', 'average_size',
        'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort'
    ]

    #logical test to control the process of burn the initial
    if parameters.time_to_cut_plots > 0:
        data = data.loc[(
            data['time']).astype(int) >= parameters.time_to_cut_plots, :]

    # variable to add in the plot title
    title_pop_val = float(parameters.pop_redutor) * 100

    # create a list of a years to plot
    list_of_years_division = list(
        range(int(data['time'].min()), int(data['time'].max()),
              12)) + [data['time'].max() + 1]
    list_of_years = [int(i / 12) for i in list_of_years_division]

    # graph paramter variables
    dpi_var_plot = 700
    width_var_plot = 15
    height_var_plot = 10

    ###############################################################################################################
    # plotting AGENTS UTILITY
    # Total firms
    plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\
                gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_total_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_total_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_total_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of output
    plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_output_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_output_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_output_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of age
    plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_age_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_age_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_age_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of size
    plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_size_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_size_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_size_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # number of new firms
    plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Number of firms out
    plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average and max size of firms
    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_effort'],
        data.iloc[:, data.columns == 'total_effort']
    ],
                           axis=1)

    plot_data = dat_merged.plot(
        title='Average and maximum effort of employees')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Values units of effort')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)

    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_size'],
        data.iloc[:, data.columns == 'max_size']
    ],
                           axis=1)

    plot_data = dat_merged.plot(title='Average and maximum size firms')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Number of employees')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)
Example #28
0
    #Testing
    results = []
    for m in mvals:
        results.append(test_approx_nn(method = "hashing", traindata=docdata, testdata = testdata, m=m, alpha=1))
    for alpha in avals:
        results.append(test_approx_nn(method = "kdtree" , traindata=docdata, testdata = testdata, m=1, alpha=alpha))

    #save results to results folder, with plot and printing to screen.
    metadata = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode)
    f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode = 'w')
    pkl.dump(obj=results, file=f)

    logtimes =  [math.log(r.avg_time, 2)     for r in results]
    distances = [r.avg_distance for r in results]
    methods =   [r.method[0:3]  for r in results]
    alpha =     [r.alpha  for r in results]
    m =         [r.m  for r in results]
    results_df = pd.DataFrame(data = {"logtimes" : logtimes,
                                      "distances" : distances,
                                      "methods" : methods,
                                      "m":m,
                                      "alpha": alpha})
    print results_df
    p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes",
                                                         y = "distances",
                                                         label = "methods")) + \
        gg.geom_text() + \
        gg.ggtitle("LSH and KD trees: tradeoffs") + \
        gg.xlab("Log2 average query time  ") + gg.ylab("Average L2 distance from query point)")
    gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot = p)