예제 #1
0
def plot_trend_season(dates, ndf_domain, x, x_trend, season, my_domain):
    # ---------------------- Prepare Data Frame ----------------------- #
    df_domain = pd.DataFrame(ndf_domain, columns=['Date', 'Volume'])
    df_domain['Date'] = dates

    x_lbl = ['Observed Volume' for i in xrange(len(x))]
    xt_lbl = ['Overall Trend' for i in xrange(len(x_trend))]
    xs_lbl = ['Repeat Sending Trend' for i in xrange(len(season))]
    col3 = pd.DataFrame(x_lbl+xt_lbl+xs_lbl)

    df_plot = pd.concat( (df_domain, col3), axis=1)
    df_plot.columns = ['Date', 'Volume', 'Data']
    
    
    # ---------------------- Plot Decomposition ----------------------- #
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
        ggplot.geom_line(color='blue', size=2) + \
        ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
        ggplot.xlab("Week (Marked on Mondays)") + \
        ggplot.ylab("Message Vol") + \
        ggplot.ggtitle("%s Message Volume by Week" % my_domain) + \
        ggplot.facet_grid('Data', scales='free_y') + \
        ggplot.theme_seaborn()

    return p
예제 #2
0
def plot_bin_dists(df, bin_def="distance_bin <= 500"):
    plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65

    p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def))
    p = p + gp.geom_histogram(
        fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn(
            context='talk') + gp.ggtitle(bin_def)

    return p
예제 #3
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() +
         gg.facet_wrap("variant.type", "category") + gg.theme_seaborn())
    gg.ggsave(p, out_file)
예제 #4
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar()
         + gg.facet_wrap("variant.type", "category")
         + gg.theme_seaborn())
    gg.ggsave(p, out_file)
예제 #5
0
def googletrend_command(delta_t, threshold=0.0, inverse=False):
    """the command to run google trend algorithm.

	:param delta_t:   the upper bound for original delta_t parameter
    :param threshold: upper bound for the threshold of differentiating two classes
    :param inverse:   whether to inverse the classifier
	"""
    ## handle filepath and title based on parameter inverse
    filename = "googletrend"
    titlename = "ROC of google trend classifier"
    if inverse:
        filename += "_inverse"
        titlename += " (inverse version)"
    filepath = "./plots/%s.jpg" % filename
    ## generate data first
    data = googletrend.preprocess()
    ## store classifier evaluation metrics into dict
    output = {}
    output['tpr'] = []
    output['fpr'] = []
    output['plot'] = []
    for thre in np.arange(0, threshold + 0.1, 0.1):
        print "==> threshold: %f, inverse: %s" % (thre, inverse)
        for i in xrange(1, int(delta_t)):
            googletrend.algorithm(data, i, thre, inverse)
            tp_rate, fp_rate = googletrend.evaluate(data)
            # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate)
            output['tpr'].append(tp_rate)
            output['fpr'].append(fp_rate)
            output['plot'].append('thre_' + str(thre))
    ## plot ROC graph
    ## add a y=x baseline for comparison
    output['tpr'].extend([0.0, 1.0])
    output['fpr'].extend([0.0, 1.0])
    output['plot'].extend(['baseline', 'baseline'])
    df = pd.DataFrame(output)
    graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \
      gg.theme_seaborn() + \
      gg.ggtitle(titlename) + \
         gg.xlab("FPR") + \
         gg.ylab("TPR") + \
         gg.xlim(0.0, 1.0) + \
         gg.ylim(0.0, 1.0) + \
      gg.geom_point() + \
      gg.geom_line()
    gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
 def _plot_scat_w_line(self, gp_aes):
     return gp_aes + gp.geom_point(color='coral') + gp.stat_smooth(span=.2, color='blue',
                                                                   se=False) + gp.theme_seaborn(
         context='talk')
예제 #7
0
def plot_vol(dates, x, cp, my_domain):
    # -------------------- Prepare for Plotting -------------------------- #
    # Prepare DataFrame objects for graphing
    #Add a column for the label to show in the legend in the graph
    #Need to reshape it, from (124,) to (124,1) for exmple, so that it
    #will concatenate. This gives a df with [date, vol_data, 'Volume']
    v = ['Volume' for i in xrange(x.shape[0])]
    #df_domain = np.concatenate((x, v), axis=1)
    ndf_vol = np.transpose(np.array([dates, x, v]))
    df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data'])

    #Create pre-allocated lists for plotting means and cp
    xmin_list = [0 for i in xrange(len(cp))]  #hold lft pt of vol_mean
    xmax_list = [0 for i in xrange(len(cp))]  #hold rt pt of vol_mean
    yint_list = [0 for i in xrange(len(cp))]  #holds vol_means
    cp_date_list = [0 for i in xrange(len(cp))]  #holds date for cp
    cp_value_list = [0 for i in xrange(len(cp))]  #holds cp value

    ref_idx = 0  #used to keep track of vol_means
    #collect list data for plotting
    for i in xrange(len(cp)):
        cp_idx = cp[i][0] - 1  #-1 b/c 1-indexed (includes cp itself)
        xmin_list[i] = dates[ref_idx].toordinal()  #convert to match ggplot
        xmax_list[i] = dates[cp_idx].toordinal()  #convert to match ggplot
        yint_list[i] = cp[i][2]  #use value from_mean for vol_mean
        cp_date_list[i] = dates[cp_idx]  #date of cp
        #cp_value_list[i] = x[cp_idx] #value of cp
        cp_value_list[i] = cp[i][2]
        ref_idx = cp_idx + 1  #+1 b/c moving to next point

    #Reform lists into a data frame and attach to df_domains. The first two
    #lists can be created together since they are both numeric, but if I try
    #to create all three together all types will be downgraded to strings.
    #np.concatenate avoids this conversion. The transpose is needed to take
    #an item from each to form a single row.
    cp_lbl = ['Change Point' for i in xrange(len(yint_list))]

    #Need to create a dummy entry to put 'Volume Mean' into legend
    cp_date_list.append(dates[0])
    yint_list.append(x[0])
    cp_lbl.append('Volume Mean')
    ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl]))
    yint_list.pop(-1)
    cp_date_list.pop(-1)
    df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data'])

    df_plot = pd.concat((df_vol, df_cp), axis=0)

    #Need to create a dummy entry to put 'Volume Mean' into legend
    #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1)
    #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp
    #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains

    #convert final array into a pd.DataFrame for printing and plotting
    #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data'])
    #df_domain.to_html(open('out.html','w'))
    #os.system('sudo cp out.html /usr/local/www/analytics/rwing')

    margin = 0.10 * (np.max(x) - np.min(x))
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
            ggplot.geom_line(color='blue',size=2) + \
            ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \
                        shape='D', size=50) + \
            ggplot.geom_hline(xmin=xmin_list, \
                        xmax=xmax_list, \
                        yintercept=yint_list, color="red", size=3) + \
            ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
            ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \
            ggplot.scale_y_continuous(labels='comma') + \
            ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \
            ggplot.xlab("Week (Marked on Mondays)") + \
            ggplot.ylab("Message Vol") + \
            ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \
            ggplot.theme_seaborn()

    return p