def plot_matches(df_in, date, filename_out, x_var='date_time', y_var="shorthand_search_vol"): """ Plot y-var and save based on specified variables. Assumes that df has already been filtered using dplyr's sift mechanism. Also assumes that a date has been passed in. """ # basic data processing for viz df_in['date_time'] = date + " " + df_in['time'].astype(str) df_in['date_time'] = pd.to_datetime(df_in['date_time'], errors="coerce", infer_datetime_format=True) # build layers for plot p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"), data=df_in) p += geom_line(size=2) # informative p += labs(x="time (gmt)", y="search volume (scaled to 100)") # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium") p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes") # visual t = theme_gray() t._rcParams['font.size'] = 8 t._rcParams['font.family'] = 'monospace' p += t # done p.save(filename_out, width=16, height=8)
def boxplot(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column) box_plot = ggplot( aes(x=column), data=data_df) + geom_boxplot() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(box_plot)
def line_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) line_plot = ggplot( aes(y=column2, x=column1), data=data_df) + geom_line() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(line_plot)
def plot_predictions(date_times, actual_values, predictions, match_id, feature_set_in, filename): """ Plot y-var and save based on specified variables. Assumes that df has already been filtered using dplyr's sift mechanism. Also assumes that a date has been passed in. """ actual_df = pd.DataFrame() actual_df['date_time'] = pd.to_datetime(date_times, errors="coerce", infer_datetime_format=True) actual_df['search_vol'] = actual_values actual_df['match_id'] = "actual" + match_id predict_df = pd.DataFrame() predict_df['date_time'] = pd.to_datetime(date_times, errors="coerce", infer_datetime_format=True) predict_df['search_vol'] = list(predictions) predict_df['match_id'] = "predictedby_" + str(feature_set_in) + match_id plotting_df = pd.concat([actual_df, predict_df], axis=0, ignore_index=True) # build layers for plot p = ggplot(aes(x='date_time', y='search_vol', group="match_id", color="match_id"), data=plotting_df) p += geom_line(size=2) # informative p += labs(x="time (gmt)", y="search volume (scaled to 100)") # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium") p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes") # visual t = theme_gray() t._rcParams['font.size'] = 8 t._rcParams['font.family'] = 'monospace' p += t # done p.save(filename, width=16, height=8)
def area_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) ymin = float( input("Enter the minimum value that should be plotted: ")) ymax = float( input("Enter the maximum value that should be plotted: ")) area_plot = ggplot( aes(x=column2, ymin=ymin, ymax=ymax), data=data_df) + geom_area() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(area_plot)
df = pd.melt(df) df['feature'] = feature dfs_to_concat.append(df) master_df = pd.concat(dfs_to_concat) # histogram p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df) p += geom_histogram(bins=25, alpha=0.5) p += scale_x_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot() p += scale_y_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitudes") p += facet_wrap("feature", ncol=3) p += labs(x=" ", y=" ") # visuals
def main(): global args, ruleset # Arguments Parser argparser, subparser = parser_setup() register_rules(subparser) args = argparser.parse_args() rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset] rulemod.prepare(args, srand) if args.debug: print "DEBUG: args", args print results = list() pool = multiprocessing.Pool() try: for result in pool.map(rulemod.simulate_rolls, rulemod.variables): results.extend(result) pool.close() pool.join() except KeyboardInterrupt: sys.exit(130) if args.debug: print "DEBUG: results:" pprint(results) print conf = dict() conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"} for item in conf: try: conf[item] = getattr(rulemod, item) except: pass columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"]) data = pandas.DataFrame.from_records(results, columns=columns) # Create and save graphs for gkey in rulemod.graphs: # Graph Defaults graph_conf = conf.copy() graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey) graph_conf["file_suffix"] = str() # colors colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"] colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"] colors_mid = ["#000000"] color_count = len(rulemod.variables) - 1 if color_count % 2 == 0: lower_slice = (color_count / 2) * -1 upper_slice = color_count / 2 else: lower_slice = ((color_count - 1) / 2) * -1 upper_slice = (color_count + 1) / 2 graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice] # graph_conf from graph graph_items = ( "color_list", "file_prefix", "file_suffix", "graph_type", "limits", "x_breaks", "x_labels", "title", "vlab", "xlab", "ylab", ) for item in graph_items: try: graph_conf[item] = rulemod.graphs[gkey][item] except: try: graph_conf[item] = getattr(rulemod, item) except: if item not in graph_conf: graph_conf[item] = None if args.debug: print "DEBUG: graph_conf:" pprint(graph_conf) print # plot_data plot_data = data.copy() plot_data = plot_data[plot_data["Graph"] == gkey] plot_data.rename( columns={ conf["vlab"]: graph_conf["vlab"], conf["xlab"]: graph_conf["xlab"], conf["ylab"]: graph_conf["ylab"], }, inplace=True, ) plot_data.index = range(1, len(plot_data) + 1) if args.debug: print "DEBUG: plot_data:" pprint(plot_data) print # Create plot if args.graph: plot = ( ggplot.ggplot( ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data ) + ggplot.ggtitle(graph_conf["title"]) + ggplot.theme_gray() + ggplot.scale_colour_manual(values=graph_conf["color_list"]) ) plot.rcParams["font.family"] = "monospace" if graph_conf["x_breaks"] and graph_conf["x_labels"]: plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"]) if graph_conf["limits"]: plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1]) if graph_conf["graph_type"] == "bars": plot += ggplot.geom_line(size=20) text_data = plot_data[plot_data["Count"] > 0] text_data.index = range(0, len(text_data)) outcomes = dict(text_data[graph_conf["xlab"]]) percents = dict(text_data[graph_conf["ylab"]]) for k in outcomes: percent = "%4.1f%%" % percents[k] x = outcomes[k] y = percents[k] + 4 color = graph_conf["color_list"][k] plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color) else: plot += ggplot.geom_line() plot += ggplot.geom_point(alpha=0.3, size=50) if hasattr(rulemod, "update_plot"): plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data) if args.dumpsave: filename = "/dev/null" else: filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"]) ggplot.ggsave(filename, plot, format="png", dpi=300) return 0
def density_chart(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column) density_plot = ggplot(aes(x=column), data=data_df) + geom_density() + theme_gray() + labs(title=title) print(density_plot)
def hist_chart(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn = conn, table = table_chosen, column = column) hist_plot = ggplot(aes(x=column), data=data_df) + geom_histogram() + theme_gray() + labs(title=title) print(hist_plot)
def point_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) point_plot = ggplot(aes(x=column1, y=column2), data=data_df) + geom_point() + theme_gray() + labs(title=title) print(point_plot)