def displacement_plot(centered, limits=None, style=None): u"""Draws nice displacement plots using ggplot2. params: centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably produced by calling center() above limits (real): Sets the limits of the scales to a square window showing ±limits on each axis. style (Iterable): Collection of strings. Recognized values are 'theme-bw' (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot' (which does not label the end of tracks which terminate early). Returns: g (gg.ggplot): Plot object """ style = {} if style is None else style centered['Object'] = centered['Object'].map(str) centered = centered.sort(['Frame', 'Object']) g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) + gg.geom_path(size=0.3)) g += gg.theme_bw() # if 'theme-bw' in style else gg.theme_seaborn() if limits: g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits) if 'no-terminal-dot' not in style: max_frame = centered['Frame'].max() endframe = centered.groupby('Object')['Frame'].max() endframe = endframe[endframe != max_frame].reset_index() endframe = endframe.merge(centered, on=['Object', 'Frame']) # we should check if endframe is empty before adding it: # https://github.com/yhat/ggplot/issues/425 if not endframe.empty: g += gg.geom_point(data=endframe, color='black', size=1) return g
def plot_update_frequency(result): import pandas as pd import numpy #turns query results into timeseries of chnages d = [] v = [] for res in result: d.append(pd.Timestamp(res['_id']['timestamp']).to_datetime()) v.append(res['count']) ts = pd.DataFrame(v, index = d, columns = ['changes']) ts = ts.resample('W', how='sum') ts.index.names = ['date'] import ggplot #plots timeseries of changes p = ggplot.ggplot(ts, ggplot.aes(x = ts.index, y=ts['changes'])) +\ ggplot.geom_point(color = 'blue') +\ ggplot.xlab('Period') +\ ggplot.ylab('Changes') +\ ggplot.geom_smooth() +\ ggplot.ylim(low = 0) +\ ggplot.scale_x_date(breaks = ggplot.date_breaks("12 months"), labels = ggplot.date_format('%Y-%m')) +\ ggplot.ggtitle('OpenStreetMaps Denver-Boulder\nChanges per Week') return p
def plot_outcomes(self, chart_title=None, use_ggplot=False): """ Plot the outcomes of patients observed. :param chart_title: optional chart title. Default is fairly verbose :type chart_title: str :param use_ggplot: True to use ggplot, else matplotlib :type use_ggplot: bool :return: a plot of patient outcomes """ if not chart_title: chart_title="Each point represents a patient\nA circle indicates no toxicity, a cross toxicity" chart_title = chart_title + "\n" if use_ggplot: if self.size() > 0: from ggplot import (ggplot, ggtitle, geom_text, aes, ylim) import numpy as np import pandas as pd patient_number = range(1, self.size()+1) symbol = np.where(self.toxicities(), 'X', 'O') data = pd.DataFrame({'Patient number': patient_number, 'Dose level': self.doses(), 'DLT': self.toxicities(), 'Symbol': symbol}) p = ggplot(data, aes(x='Patient number', y='Dose level', label='Symbol')) \ + ggtitle(chart_title) + geom_text(aes(size=20, vjust=-0.07)) + ylim(1, 5) return p else: if self.size() > 0: import matplotlib.pyplot as plt import numpy as np patient_number = np.arange(1, self.size()+1) doses_given = np.array(self.doses()) tox_loc = np.array(self.toxicities()).astype('bool') if sum(tox_loc): plt.scatter(patient_number[tox_loc], doses_given[tox_loc], marker='x', s=300, facecolors='none', edgecolors='k') if sum(~tox_loc): plt.scatter(patient_number[~tox_loc], doses_given[~tox_loc], marker='o', s=300, facecolors='none', edgecolors='k') plt.title(chart_title) plt.ylabel('Dose level') plt.xlabel('Patient number') plt.yticks(self.dose_levels()) p = plt.gcf() phi = (np.sqrt(5)+1)/2. p.set_size_inches(12, 12/phi)
def googletrend_command(delta_t, threshold=0.0, inverse=False): """the command to run google trend algorithm. :param delta_t: the upper bound for original delta_t parameter :param threshold: upper bound for the threshold of differentiating two classes :param inverse: whether to inverse the classifier """ ## handle filepath and title based on parameter inverse filename = "googletrend" titlename = "ROC of google trend classifier" if inverse: filename += "_inverse" titlename += " (inverse version)" filepath = "./plots/%s.jpg" % filename ## generate data first data = googletrend.preprocess() ## store classifier evaluation metrics into dict output = {} output['tpr'] = [] output['fpr'] = [] output['plot'] = [] for thre in np.arange(0, threshold + 0.1, 0.1): print "==> threshold: %f, inverse: %s" % (thre, inverse) for i in xrange(1, int(delta_t)): googletrend.algorithm(data, i, thre, inverse) tp_rate, fp_rate = googletrend.evaluate(data) # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate) output['tpr'].append(tp_rate) output['fpr'].append(fp_rate) output['plot'].append('thre_' + str(thre)) ## plot ROC graph ## add a y=x baseline for comparison output['tpr'].extend([0.0, 1.0]) output['fpr'].extend([0.0, 1.0]) output['plot'].extend(['baseline', 'baseline']) df = pd.DataFrame(output) graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \ gg.theme_seaborn() + \ gg.ggtitle(titlename) + \ gg.xlab("FPR") + \ gg.ylab("TPR") + \ gg.xlim(0.0, 1.0) + \ gg.ylim(0.0, 1.0) + \ gg.geom_point() + \ gg.geom_line() gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = dftmp #enhancement-based dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) #enhancements + full sample background dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']] #plots #compare all 3 plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title)) plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300) #plot total alone for presenation plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt2 = plt2+t plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300) #plot enhancement alone for presenation plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt3 = plt3+t plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300) print("Query results loaded into table {0}".format(destinationtable_str))
def plot_toxicity_probabilities(self, chart_title=None, use_ggplot=False): """ Plot prior and posterior dose-toxicity curves. :param chart_title: optional chart title. Default is fairly verbose :type chart_title: str :param use_ggplot: True to use ggplot, else matplotlib :type use_ggplot: bool :return: plot of toxicity curves """ if not chart_title: chart_title = "Prior (dashed) and posterior (solid) dose-toxicity curves" chart_title = chart_title + "\n" if use_ggplot: from ggplot import (ggplot, ggtitle, geom_line, geom_hline, aes, ylim) import numpy as np import pandas as pd data = pd.DataFrame({'Dose level': self.dose_levels(), 'Prior': self.prior, 'Posterior': self.prob_tox(), # 'Lower': crm.get_tox_prob_quantile(0.05), # 'Upper': crm.get_tox_prob_quantile(0.95) }) var_name = 'Type' value_name = 'Probability of toxicity' melted_data = pd.melt(data, id_vars='Dose level', var_name=var_name, value_name=value_name) # melted_data['LineType'] = np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..')) # melted_data['LineType'] = np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..')) # melted_data['Col'] = np.where(melted_data.Type=='Posterior', 'green', np.where(melted_data.Type=='Prior', 'blue', 'yellow')) # np.where(melted_data.Type=='Posterior', '--', '-') p = ggplot(melted_data, aes(x='Dose level', y=value_name, linetype=var_name)) + geom_line() \ + ggtitle(chart_title) + ylim(0, 1) + geom_hline(yintercept=self.target, color='black') # Can add confidence intervals once I work out linetype=??? in ggplot return p else: import matplotlib.pyplot as plt import numpy as np dl = self.dose_levels() prior_tox = self.prior post_tox = self.prob_tox() post_tox_lower = self.get_tox_prob_quantile(0.05) post_tox_upper = self.get_tox_prob_quantile(0.95) plt.plot(dl, prior_tox, '--', c='black') plt.plot(dl, post_tox, '-', c='black') plt.plot(dl, post_tox_lower, '-.', c='black') plt.plot(dl, post_tox_upper, '-.', c='black') plt.scatter(dl, prior_tox, marker='x', s=300, facecolors='none', edgecolors='k') plt.scatter(dl, post_tox, marker='o', s=300, facecolors='none', edgecolors='k') plt.axhline(self.target) plt.ylim(0, 1) plt.xlim(np.min(dl), np.max(dl)) plt.xticks(dl) plt.ylabel('Probability of toxicity') plt.xlabel('Dose level') plt.title(chart_title) p = plt.gcf() phi = (np.sqrt(5) + 1) / 2. p.set_size_inches(12, 12 / phi)
import sys from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() species = 'no2' df = pd.read_csv(r'.\charts\background_data_melted.csv', index_col='idx', dtype={ 'timestamp': 'str', 'vidperiod': 'str', 'type': 'str', 'param': 'str', 'value': 'float64' }) print(df[:10]) df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S") #plots plt1 = gg.ggplot(df, gg.aes( x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab( 'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim( 0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle( 'Regional background comparison {0}'.format(species)) #+gg.theme(axis_text_x=gg.element_text(angle=20)) plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format( species, dt.datetime.today().strftime('%Y%b%d')), width=None, height=None, dpi=300)
total = len(model2scores[model][plot_key_name]) for value in model2scores[model][plot_key_name]: plot_dataset.append( [model, value, the_mean / total, the_std, the_max, the_min]) plot_dataset_pd = pd.DataFrame( plot_dataset, columns=['model', 'value', 'weight', 'std', 'max', 'min']) if 'logloss' in plot_key_name: p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\ ggplot.geom_bar(position = 'stack', width = 4) +\ ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\ ggplot.ylim(0 ,5.05) +\ ggplot.ggtitle(plot_key_name) #print(p) elif 'time' in plot_key_name: p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\ ggplot.geom_bar(position = 'stack', width = 4) +\ ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\ ggplot.ggtitle(plot_key_name) #print(p) else:
tile(w_from_figure_wh_ratio, norm(data)), '%s-layer-acts-%s-%s-(i=%s)' % (img_desc, layer, show_tuple_tight(data.shape), batch_i), ) conv_layers = filter(lambda (layer, acts): len(acts.data.shape) == 4, net.blobs.items()) fc_layers = filter(lambda (layer, acts): len(acts.data.shape) != 4, net.blobs.items()) # Plot conv acts for layer, acts in conv_layers: plot_conv_acts(layer, acts) # Plot fc acts df = pd.concat([ pd.DataFrame({'act': acts.data[batch_i], 'layer': layer}).reset_index() for layer, acts in fc_layers ]) plot_gg(gg_layer( gg.ggplot(df, gg.aes(y='act', x='index')), gg.geom_point(alpha=.5), gg.facet_wrap(x='layer', scales='free'), gg.ggtitle('%s layer acts fc/prob points (i=%s)' % (img_desc, batch_i)), )) plot_gg(gg_layer( gg.ggplot(df, gg.aes(x='act')), gg.geom_histogram(bins=25, size=0), gg.facet_wrap(x='layer', scales='free'), gg.scale_y_log(), gg.ylim(low=0.1), gg.ggtitle('%s layer acts fc/prob histo (i=%s)' % (img_desc, batch_i)), ))
value_vars=['p05', 'p25', 'p50', 'p75', 'p95'], var_name='yparam', value_name='value') print(c['name']) #print(df_a) #plots #split percentiles into different charts, all sites #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y') #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200) #n_segments plt2 = gg.ggplot( df_a, gg.aes(x='n_passes', y='n_segments', color='site_str') ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab( 'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim( 0, 35) + gg.ylim(0, 2000) plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format( c['name'], dtstamp), width=None, height=None, dpi=200) #combine percentiles, split sites plt3 = gg.ggplot( df_along, gg.aes(x='n_passes', y='value', color='yparam') ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab( 'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim( -100, 100) + gg.geom_hline( y=25, linetype="dashed", color="gray") + gg.geom_hline( y=-25, linetype="dashed", color="gray") + gg.geom_vline( x=[10, 15], linetype="dashed", color="gray") + gg.scale_color_manual(
def main(): global args, ruleset # Arguments Parser argparser, subparser = parser_setup() register_rules(subparser) args = argparser.parse_args() rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset] rulemod.prepare(args, srand) if args.debug: print "DEBUG: args", args print results = list() pool = multiprocessing.Pool() try: for result in pool.map(rulemod.simulate_rolls, rulemod.variables): results.extend(result) pool.close() pool.join() except KeyboardInterrupt: sys.exit(130) if args.debug: print "DEBUG: results:" pprint(results) print conf = dict() conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"} for item in conf: try: conf[item] = getattr(rulemod, item) except: pass columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"]) data = pandas.DataFrame.from_records(results, columns=columns) # Create and save graphs for gkey in rulemod.graphs: # Graph Defaults graph_conf = conf.copy() graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey) graph_conf["file_suffix"] = str() # colors colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"] colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"] colors_mid = ["#000000"] color_count = len(rulemod.variables) - 1 if color_count % 2 == 0: lower_slice = (color_count / 2) * -1 upper_slice = color_count / 2 else: lower_slice = ((color_count - 1) / 2) * -1 upper_slice = (color_count + 1) / 2 graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice] # graph_conf from graph graph_items = ( "color_list", "file_prefix", "file_suffix", "graph_type", "limits", "x_breaks", "x_labels", "title", "vlab", "xlab", "ylab", ) for item in graph_items: try: graph_conf[item] = rulemod.graphs[gkey][item] except: try: graph_conf[item] = getattr(rulemod, item) except: if item not in graph_conf: graph_conf[item] = None if args.debug: print "DEBUG: graph_conf:" pprint(graph_conf) print # plot_data plot_data = data.copy() plot_data = plot_data[plot_data["Graph"] == gkey] plot_data.rename( columns={ conf["vlab"]: graph_conf["vlab"], conf["xlab"]: graph_conf["xlab"], conf["ylab"]: graph_conf["ylab"], }, inplace=True, ) plot_data.index = range(1, len(plot_data) + 1) if args.debug: print "DEBUG: plot_data:" pprint(plot_data) print # Create plot if args.graph: plot = ( ggplot.ggplot( ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data ) + ggplot.ggtitle(graph_conf["title"]) + ggplot.theme_gray() + ggplot.scale_colour_manual(values=graph_conf["color_list"]) ) plot.rcParams["font.family"] = "monospace" if graph_conf["x_breaks"] and graph_conf["x_labels"]: plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"]) if graph_conf["limits"]: plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1]) if graph_conf["graph_type"] == "bars": plot += ggplot.geom_line(size=20) text_data = plot_data[plot_data["Count"] > 0] text_data.index = range(0, len(text_data)) outcomes = dict(text_data[graph_conf["xlab"]]) percents = dict(text_data[graph_conf["ylab"]]) for k in outcomes: percent = "%4.1f%%" % percents[k] x = outcomes[k] y = percents[k] + 4 color = graph_conf["color_list"][k] plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color) else: plot += ggplot.geom_line() plot += ggplot.geom_point(alpha=0.3, size=50) if hasattr(rulemod, "update_plot"): plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data) if args.dumpsave: filename = "/dev/null" else: filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"]) ggplot.ggsave(filename, plot, format="png", dpi=300) return 0
def plot_vol(dates, x, cp, my_domain): # -------------------- Prepare for Plotting -------------------------- # # Prepare DataFrame objects for graphing #Add a column for the label to show in the legend in the graph #Need to reshape it, from (124,) to (124,1) for exmple, so that it #will concatenate. This gives a df with [date, vol_data, 'Volume'] v = ['Volume' for i in xrange(x.shape[0])] #df_domain = np.concatenate((x, v), axis=1) ndf_vol = np.transpose(np.array([dates, x, v])) df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data']) #Create pre-allocated lists for plotting means and cp xmin_list = [0 for i in xrange(len(cp))] #hold lft pt of vol_mean xmax_list = [0 for i in xrange(len(cp))] #hold rt pt of vol_mean yint_list = [0 for i in xrange(len(cp))] #holds vol_means cp_date_list = [0 for i in xrange(len(cp))] #holds date for cp cp_value_list = [0 for i in xrange(len(cp))] #holds cp value ref_idx = 0 #used to keep track of vol_means #collect list data for plotting for i in xrange(len(cp)): cp_idx = cp[i][0] - 1 #-1 b/c 1-indexed (includes cp itself) xmin_list[i] = dates[ref_idx].toordinal() #convert to match ggplot xmax_list[i] = dates[cp_idx].toordinal() #convert to match ggplot yint_list[i] = cp[i][2] #use value from_mean for vol_mean cp_date_list[i] = dates[cp_idx] #date of cp #cp_value_list[i] = x[cp_idx] #value of cp cp_value_list[i] = cp[i][2] ref_idx = cp_idx + 1 #+1 b/c moving to next point #Reform lists into a data frame and attach to df_domains. The first two #lists can be created together since they are both numeric, but if I try #to create all three together all types will be downgraded to strings. #np.concatenate avoids this conversion. The transpose is needed to take #an item from each to form a single row. cp_lbl = ['Change Point' for i in xrange(len(yint_list))] #Need to create a dummy entry to put 'Volume Mean' into legend cp_date_list.append(dates[0]) yint_list.append(x[0]) cp_lbl.append('Volume Mean') ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl])) yint_list.pop(-1) cp_date_list.pop(-1) df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data']) df_plot = pd.concat((df_vol, df_cp), axis=0) #Need to create a dummy entry to put 'Volume Mean' into legend #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1) #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains #convert final array into a pd.DataFrame for printing and plotting #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data']) #df_domain.to_html(open('out.html','w')) #os.system('sudo cp out.html /usr/local/www/analytics/rwing') margin = 0.10 * (np.max(x) - np.min(x)) p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \ ggplot.geom_line(color='blue',size=2) + \ ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \ shape='D', size=50) + \ ggplot.geom_hline(xmin=xmin_list, \ xmax=xmax_list, \ yintercept=yint_list, color="red", size=3) + \ ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \ ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \ ggplot.scale_y_continuous(labels='comma') + \ ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \ ggplot.xlab("Week (Marked on Mondays)") + \ ggplot.ylab("Message Vol") + \ ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \ ggplot.theme_seaborn() return p
ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Max')) + ggplot.geom_boxplot() vals = [] n_val = [] iterations = map(lambda x: str(x), range(2, 13)) for iteration in iterations: data_i = data[iteration] vals += (data_i == 0).sum().tolist() n_val += [iteration] * data_i.shape[1] rr = pandas.DataFrame([n_val, vals]).T rr.columns = ['Iteration', 'Equal to 0'] ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Equal to 0')) + ggplot.geom_boxplot() vals = [] n_val = [] iterations = map(lambda x: str(x), range(2, 13)) for iteration in iterations: data_i = data[iteration] vals += data_i.quantile(0.99).tolist() n_val += [iteration] * data_i.shape[1] rr = pandas.DataFrame([n_val, vals]).T rr.columns = ['Iteration', 'Median'] ggplot.ggplot(rr, ggplot.aes( x='Iteration', y='Median')) + ggplot.geom_boxplot() + ggplot.ylim( 0, 0.00025)