def update_plot(gkey, graph_conf, plot, plot_data): """Determine x axis breaks and add rolled dice symbols to plot.""" # Determine x axis breaks x_breaks = plot_data.copy() x_breaks = x_breaks[graph_conf["xlab"]].drop_duplicates().tolist() x_breaks.sort() x_labels = [" "] + [abs(x) for x in x_breaks] + [" "] x_breaks = [x_breaks[0] - 0.5] + x_breaks + [x_breaks[-1] + 0.5] plot += ggplot.scale_x_discrete(breaks=x_breaks, labels=x_labels) # Add rolled dice symbols top0 = graph_conf["limits"][1] - 1 top1 = top0 - 1.2 left0 = x_breaks[1] left1 = left0 colors = {"b": "#000000", "B": "#87CEFA", "c": "#ff0000", "d": "#4B0082", "C": "#ffff00", "D": "#008000"} pad = len(x_breaks) * 0.0235 for letter in graphs[gkey]["dice_key"]: if letter in "CDB": color = colors[letter] plot += ggplot.geom_text( label=[letter.lower()], x=[left0, left0 + 1], y=[top0, top0 - 1], color=color, family="EotE Symbol" ) left0 += pad else: color = colors[letter] plot += ggplot.geom_text( label=[letter.lower()], x=[left1, left1 + 1], y=[top1, top1 - 1], color=color, family="EotE Symbol" ) left1 += pad return plot
def plot_outcomes(self, chart_title=None, use_ggplot=False): """ Plot the outcomes of patients observed. :param chart_title: optional chart title. Default is fairly verbose :type chart_title: str :param use_ggplot: True to use ggplot, else matplotlib :type use_ggplot: bool :return: a plot of patient outcomes """ if not chart_title: chart_title="Each point represents a patient\nA circle indicates no toxicity, a cross toxicity" chart_title = chart_title + "\n" if use_ggplot: if self.size() > 0: from ggplot import (ggplot, ggtitle, geom_text, aes, ylim) import numpy as np import pandas as pd patient_number = range(1, self.size()+1) symbol = np.where(self.toxicities(), 'X', 'O') data = pd.DataFrame({'Patient number': patient_number, 'Dose level': self.doses(), 'DLT': self.toxicities(), 'Symbol': symbol}) p = ggplot(data, aes(x='Patient number', y='Dose level', label='Symbol')) \ + ggtitle(chart_title) + geom_text(aes(size=20, vjust=-0.07)) + ylim(1, 5) return p else: if self.size() > 0: import matplotlib.pyplot as plt import numpy as np patient_number = np.arange(1, self.size()+1) doses_given = np.array(self.doses()) tox_loc = np.array(self.toxicities()).astype('bool') if sum(tox_loc): plt.scatter(patient_number[tox_loc], doses_given[tox_loc], marker='x', s=300, facecolors='none', edgecolors='k') if sum(~tox_loc): plt.scatter(patient_number[~tox_loc], doses_given[~tox_loc], marker='o', s=300, facecolors='none', edgecolors='k') plt.title(chart_title) plt.ylabel('Dose level') plt.xlabel('Patient number') plt.yticks(self.dose_levels()) p = plt.gcf() phi = (np.sqrt(5)+1)/2. p.set_size_inches(12, 12/phi)
def plot(self, inputs): """Plot the given X and Y axes on a scatter plot""" if inputs.year not in self.dat.Year.values: return if inputs.xvar not in self.dat or inputs.yvar not in self.dat: return subdat = self.dat[self.dat.Year == inputs.year] p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar)) p = p + geom_point() if inputs.shownames: p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1) if inputs.linear: p = p + stat_smooth(color="red", method="lm") return p
m=1, alpha=alpha)) #save results to results folder, with plot and printing to screen. metadata = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode) f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode='w') pkl.dump(obj=results, file=f) logtimes = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame( data={ "logtimes": logtimes, "distances": distances, "methods": methods, "m": m, "alpha": alpha }) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("Log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot=p)
#Testing results = [] for m in mvals: results.append(test_approx_nn(method = "hashing", traindata=docdata, testdata = testdata, m=m, alpha=1)) for alpha in avals: results.append(test_approx_nn(method = "kdtree" , traindata=docdata, testdata = testdata, m=1, alpha=alpha)) #save results to results folder, with plot and printing to screen. metadata = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "test_mode==" + str(test_mode) f = open("results/LSH_vs_KDT_%s.pkl" % metadata, mode = 'w') pkl.dump(obj=results, file=f) logtimes = [math.log(r.avg_time, 2) for r in results] distances = [r.avg_distance for r in results] methods = [r.method[0:3] for r in results] alpha = [r.alpha for r in results] m = [r.m for r in results] results_df = pd.DataFrame(data = {"logtimes" : logtimes, "distances" : distances, "methods" : methods, "m":m, "alpha": alpha}) print results_df p = gg.ggplot(data = results_df, aesthetics = gg.aes(x = "logtimes", y = "distances", label = "methods")) + \ gg.geom_text() + \ gg.ggtitle("LSH and KD trees: tradeoffs") + \ gg.xlab("Log2 average query time ") + gg.ylab("Average L2 distance from query point)") gg.ggsave(filename="results/LSH_vs_KDT_%s.png" % metadata, plot = p)
def main(): global args, ruleset # Arguments Parser argparser, subparser = parser_setup() register_rules(subparser) args = argparser.parse_args() rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset] rulemod.prepare(args, srand) if args.debug: print "DEBUG: args", args print results = list() pool = multiprocessing.Pool() try: for result in pool.map(rulemod.simulate_rolls, rulemod.variables): results.extend(result) pool.close() pool.join() except KeyboardInterrupt: sys.exit(130) if args.debug: print "DEBUG: results:" pprint(results) print conf = dict() conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"} for item in conf: try: conf[item] = getattr(rulemod, item) except: pass columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"]) data = pandas.DataFrame.from_records(results, columns=columns) # Create and save graphs for gkey in rulemod.graphs: # Graph Defaults graph_conf = conf.copy() graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey) graph_conf["file_suffix"] = str() # colors colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"] colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"] colors_mid = ["#000000"] color_count = len(rulemod.variables) - 1 if color_count % 2 == 0: lower_slice = (color_count / 2) * -1 upper_slice = color_count / 2 else: lower_slice = ((color_count - 1) / 2) * -1 upper_slice = (color_count + 1) / 2 graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice] # graph_conf from graph graph_items = ( "color_list", "file_prefix", "file_suffix", "graph_type", "limits", "x_breaks", "x_labels", "title", "vlab", "xlab", "ylab", ) for item in graph_items: try: graph_conf[item] = rulemod.graphs[gkey][item] except: try: graph_conf[item] = getattr(rulemod, item) except: if item not in graph_conf: graph_conf[item] = None if args.debug: print "DEBUG: graph_conf:" pprint(graph_conf) print # plot_data plot_data = data.copy() plot_data = plot_data[plot_data["Graph"] == gkey] plot_data.rename( columns={ conf["vlab"]: graph_conf["vlab"], conf["xlab"]: graph_conf["xlab"], conf["ylab"]: graph_conf["ylab"], }, inplace=True, ) plot_data.index = range(1, len(plot_data) + 1) if args.debug: print "DEBUG: plot_data:" pprint(plot_data) print # Create plot if args.graph: plot = ( ggplot.ggplot( ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data ) + ggplot.ggtitle(graph_conf["title"]) + ggplot.theme_gray() + ggplot.scale_colour_manual(values=graph_conf["color_list"]) ) plot.rcParams["font.family"] = "monospace" if graph_conf["x_breaks"] and graph_conf["x_labels"]: plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"]) if graph_conf["limits"]: plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1]) if graph_conf["graph_type"] == "bars": plot += ggplot.geom_line(size=20) text_data = plot_data[plot_data["Count"] > 0] text_data.index = range(0, len(text_data)) outcomes = dict(text_data[graph_conf["xlab"]]) percents = dict(text_data[graph_conf["ylab"]]) for k in outcomes: percent = "%4.1f%%" % percents[k] x = outcomes[k] y = percents[k] + 4 color = graph_conf["color_list"][k] plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color) else: plot += ggplot.geom_line() plot += ggplot.geom_point(alpha=0.3, size=50) if hasattr(rulemod, "update_plot"): plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data) if args.dumpsave: filename = "/dev/null" else: filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"]) ggplot.ggsave(filename, plot, format="png", dpi=300) return 0