def plot_trend_season(dates, ndf_domain, x, x_trend, season, my_domain): # ---------------------- Prepare Data Frame ----------------------- # df_domain = pd.DataFrame(ndf_domain, columns=['Date', 'Volume']) df_domain['Date'] = dates x_lbl = ['Observed Volume' for i in xrange(len(x))] xt_lbl = ['Overall Trend' for i in xrange(len(x_trend))] xs_lbl = ['Repeat Sending Trend' for i in xrange(len(season))] col3 = pd.DataFrame(x_lbl+xt_lbl+xs_lbl) df_plot = pd.concat( (df_domain, col3), axis=1) df_plot.columns = ['Date', 'Volume', 'Data'] # ---------------------- Plot Decomposition ----------------------- # p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \ ggplot.geom_line(color='blue', size=2) + \ ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \ ggplot.xlab("Week (Marked on Mondays)") + \ ggplot.ylab("Message Vol") + \ ggplot.ggtitle("%s Message Volume by Week" % my_domain) + \ ggplot.facet_grid('Data', scales='free_y') + \ ggplot.theme_seaborn() return p
def plot_bin_dists(df, bin_def="distance_bin <= 500"): plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65 p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def)) p = p + gp.geom_histogram( fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn( context='talk') + gp.ggtitle(bin_def) return p
def _ggplot(df, out_file): """Plot faceted items with ggplot wrapper on top of matplotlib. XXX Not yet functional """ import ggplot as gg df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]] df["category"] = [cat_labels[x] for x in df["category"]] df["caller"] = [caller_labels.get(x, None) for x in df["caller"]] p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() + gg.facet_wrap("variant.type", "category") + gg.theme_seaborn()) gg.ggsave(p, out_file)
def googletrend_command(delta_t, threshold=0.0, inverse=False): """the command to run google trend algorithm. :param delta_t: the upper bound for original delta_t parameter :param threshold: upper bound for the threshold of differentiating two classes :param inverse: whether to inverse the classifier """ ## handle filepath and title based on parameter inverse filename = "googletrend" titlename = "ROC of google trend classifier" if inverse: filename += "_inverse" titlename += " (inverse version)" filepath = "./plots/%s.jpg" % filename ## generate data first data = googletrend.preprocess() ## store classifier evaluation metrics into dict output = {} output['tpr'] = [] output['fpr'] = [] output['plot'] = [] for thre in np.arange(0, threshold + 0.1, 0.1): print "==> threshold: %f, inverse: %s" % (thre, inverse) for i in xrange(1, int(delta_t)): googletrend.algorithm(data, i, thre, inverse) tp_rate, fp_rate = googletrend.evaluate(data) # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate) output['tpr'].append(tp_rate) output['fpr'].append(fp_rate) output['plot'].append('thre_' + str(thre)) ## plot ROC graph ## add a y=x baseline for comparison output['tpr'].extend([0.0, 1.0]) output['fpr'].extend([0.0, 1.0]) output['plot'].extend(['baseline', 'baseline']) df = pd.DataFrame(output) graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \ gg.theme_seaborn() + \ gg.ggtitle(titlename) + \ gg.xlab("FPR") + \ gg.ylab("TPR") + \ gg.xlim(0.0, 1.0) + \ gg.ylim(0.0, 1.0) + \ gg.geom_point() + \ gg.geom_line() gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
def _plot_scat_w_line(self, gp_aes): return gp_aes + gp.geom_point(color='coral') + gp.stat_smooth(span=.2, color='blue', se=False) + gp.theme_seaborn( context='talk')
def plot_vol(dates, x, cp, my_domain): # -------------------- Prepare for Plotting -------------------------- # # Prepare DataFrame objects for graphing #Add a column for the label to show in the legend in the graph #Need to reshape it, from (124,) to (124,1) for exmple, so that it #will concatenate. This gives a df with [date, vol_data, 'Volume'] v = ['Volume' for i in xrange(x.shape[0])] #df_domain = np.concatenate((x, v), axis=1) ndf_vol = np.transpose(np.array([dates, x, v])) df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data']) #Create pre-allocated lists for plotting means and cp xmin_list = [0 for i in xrange(len(cp))] #hold lft pt of vol_mean xmax_list = [0 for i in xrange(len(cp))] #hold rt pt of vol_mean yint_list = [0 for i in xrange(len(cp))] #holds vol_means cp_date_list = [0 for i in xrange(len(cp))] #holds date for cp cp_value_list = [0 for i in xrange(len(cp))] #holds cp value ref_idx = 0 #used to keep track of vol_means #collect list data for plotting for i in xrange(len(cp)): cp_idx = cp[i][0] - 1 #-1 b/c 1-indexed (includes cp itself) xmin_list[i] = dates[ref_idx].toordinal() #convert to match ggplot xmax_list[i] = dates[cp_idx].toordinal() #convert to match ggplot yint_list[i] = cp[i][2] #use value from_mean for vol_mean cp_date_list[i] = dates[cp_idx] #date of cp #cp_value_list[i] = x[cp_idx] #value of cp cp_value_list[i] = cp[i][2] ref_idx = cp_idx + 1 #+1 b/c moving to next point #Reform lists into a data frame and attach to df_domains. The first two #lists can be created together since they are both numeric, but if I try #to create all three together all types will be downgraded to strings. #np.concatenate avoids this conversion. The transpose is needed to take #an item from each to form a single row. cp_lbl = ['Change Point' for i in xrange(len(yint_list))] #Need to create a dummy entry to put 'Volume Mean' into legend cp_date_list.append(dates[0]) yint_list.append(x[0]) cp_lbl.append('Volume Mean') ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl])) yint_list.pop(-1) cp_date_list.pop(-1) df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data']) df_plot = pd.concat((df_vol, df_cp), axis=0) #Need to create a dummy entry to put 'Volume Mean' into legend #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1) #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains #convert final array into a pd.DataFrame for printing and plotting #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data']) #df_domain.to_html(open('out.html','w')) #os.system('sudo cp out.html /usr/local/www/analytics/rwing') margin = 0.10 * (np.max(x) - np.min(x)) p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \ ggplot.geom_line(color='blue',size=2) + \ ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \ shape='D', size=50) + \ ggplot.geom_hline(xmin=xmin_list, \ xmax=xmax_list, \ yintercept=yint_list, color="red", size=3) + \ ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \ ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \ ggplot.scale_y_continuous(labels='comma') + \ ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \ ggplot.xlab("Week (Marked on Mondays)") + \ ggplot.ylab("Message Vol") + \ ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \ ggplot.theme_seaborn() return p