Esempio n. 1
0
    def plot(self, what='cumulative_payouts', include_ci=True):
        import ggplot as gg #This is hacky ... need to DRY out the imports

        if what == 'cumulative_payouts':
            plt = self._plot_cumulative_payouts(include_ci=include_ci)
        elif what == 'avg_accuracy':
            plt = self._plot_avg_accuracy(include_ci=include_ci)
        elif what == 'all':
            summary = self.summary()
            p1 = self._plot_cumulative_payouts(include_ci=include_ci, summary=summary)
            p2 = self._plot_avg_accuracy(include_ci=include_ci, summary=summary)
            d1 = p1.data
            d2 = p2.data
            d1['Outcome'] = d1['AverageCumulativePayout']
            d2['Outcome'] = d2['AverageAccuracy']
            d1['Plot'] = 'Cumulative Payouts'
            d2['Plot'] = 'Average Accuracy'
            df = d1.append(d2, ignore_index=True)

            if include_ci:
                plt = gg.ggplot(gg.aes(x='Round', y='Outcome', ymin='ymin', ymax='ymax'), data=df) + \
                    gg.geom_area(alpha=0.5)
            else:
                plt = gg.ggplot(gg.aes(x='Round', y='Outcome'), data=df)

            plt += gg.facet_grid('Plot', scales='free')
        else:
            raise ValueError('%s is not a valid option' % what)

        return plt + gg.geom_line()
Esempio n. 2
0
def plot_line(X,y,title=None,labelx=None,labely=None,save=False, colors=None):
    '''
    Show on screen a line plot. Can save to a .pdf file too if specified.
    
    X,y - 
    '''
    df = pandas.DataFrame()
    
    if (title!=None):
        img_title = title.replace(" ","").replace(".","-") + ".pdf"
    
    df['X'] = X 
    for i in range(y.shape[1]):
        df[str(i)] = y.iloc[:,i].values
    
    if colors is None:
        colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())

    df = df.iloc[0:df.shape[0]-1, :]    
    p = ggplot(df, aes(x='X'))
    
    for i in range(y.shape[1]):
         if colors not in X.columns.values:
            p = p + geom_line(aes(y=str(i),color = colors[i]))
         else:
            p = p + geom_point(aes(y=str(i),color = colors))
    
    p = p + xlab(labelx) + ylab(labely) + ggtitle(title)
    
    if(save):
        p.save(img_title)
    else:   
        return p
Esempio n. 3
0
def main():
    params = load_params("./params.txt")
    test_data = load_data(datetime.date(2017, 12, 20))
    sim_data = simulate(test_data[0], params)
    dif_data = [0 for i in range(0, len(test_data[0]))]
    for i in range(0, len(dif_data) - 3):
        dif_data[i] = test_data[0][i + 3] - sim_data[i]

    df = pandas.DataFrame({
        't': range(6, len(test_data[0])),
        'price': test_data[0][6:]
    })
    df2 = pandas.DataFrame({
        't': range(12,
                   len(sim_data) - 12),
        'price': sim_data[12:-12]
    })
    df3 = pandas.DataFrame({
        't': range(12,
                   len(sim_data) - 12),
        'price': dif_data[12:-12]
    })
    a = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df) \
            + ggplot.geom_line()
    b = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df2) \
            + ggplot.geom_line(color='blue')
    c = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df3) \
            + ggplot.geom_line(color='blue')
    a.save('hoge.png')
    b.save('hoge2.png')
    c.save('hoge3.png')
 def plot_roc(self, experiment_type, to_plot):
     # turn this to string for categorical colour scheme
     to_plot.loc[:, "parameter"] = [str(par) for par in to_plot.loc[:, "parameter"]]
     p = gg.ggplot(data = to_plot, aesthetics = gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.geom_line(gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.ggtitle(experiment_type) + gg.xlab("FPR") + gg.ylab("TPR")
     gg.ggsave(filename = self.results_path + experiment_type + "_" + self.mode + ".png", plot = p)
     return
Esempio n. 5
0
    def plot_outcomes(self, chart_title=None, use_ggplot=False):
        """ Plot the outcomes of patients observed.

        :param chart_title: optional chart title. Default is fairly verbose
        :type chart_title: str
        :param use_ggplot: True to use ggplot, else matplotlib
        :type use_ggplot: bool
        :return: a plot of patient outcomes

        """

        if not chart_title:
            chart_title="Each point represents a patient\nA circle indicates no toxicity, a cross toxicity"
            chart_title = chart_title + "\n"

        if use_ggplot:
            if self.size() > 0:
                from ggplot import (ggplot, ggtitle, geom_text, aes, ylim)
                import numpy as np
                import pandas as pd
                patient_number = range(1, self.size()+1)
                symbol = np.where(self.toxicities(), 'X', 'O')
                data = pd.DataFrame({'Patient number': patient_number,
                                     'Dose level': self.doses(),
                                     'DLT': self.toxicities(),
                                     'Symbol': symbol})

                p = ggplot(data, aes(x='Patient number', y='Dose level', label='Symbol')) \
                    + ggtitle(chart_title) + geom_text(aes(size=20, vjust=-0.07)) + ylim(1, 5)
                return p
        else:
            if self.size() > 0:
                import matplotlib.pyplot as plt
                import numpy as np
                patient_number = np.arange(1, self.size()+1)
                doses_given = np.array(self.doses())
                tox_loc = np.array(self.toxicities()).astype('bool')
                if sum(tox_loc):
                    plt.scatter(patient_number[tox_loc], doses_given[tox_loc], marker='x', s=300,
                                facecolors='none', edgecolors='k')
                if sum(~tox_loc):
                    plt.scatter(patient_number[~tox_loc], doses_given[~tox_loc], marker='o', s=300,
                                facecolors='none', edgecolors='k')

                plt.title(chart_title)
                plt.ylabel('Dose level')
                plt.xlabel('Patient number')
                plt.yticks(self.dose_levels())
                p = plt.gcf()
                phi = (np.sqrt(5)+1)/2.
                p.set_size_inches(12, 12/phi)
Esempio n. 6
0
	def bar_chart(self, conn, column1, column2, table_chosen, title):
		# since this is a bar graph only two columns will be there

		data_df = dfile.double_selector(conn = conn, table= table_chosen, col1 = column1, col2 = column2)

		bar_plot = ggplot(aes(x=column1, weight=column2), data=data_df) + geom_bar() + labs(title=title)
		print(bar_plot)
def plot_update_frequency(result):    
    import pandas as pd
    import numpy
    
    #turns query results into timeseries of chnages
    d = []
    v = []
    for res in result:
        d.append(pd.Timestamp(res['_id']['timestamp']).to_datetime())
        v.append(res['count'])       
        
    ts = pd.DataFrame(v, index = d, columns = ['changes'])
    ts = ts.resample('W', how='sum')
    ts.index.names = ['date']

    import ggplot
    #plots timeseries of changes       
    p = ggplot.ggplot(ts, ggplot.aes(x = ts.index, y=ts['changes'])) +\
            ggplot.geom_point(color = 'blue') +\
            ggplot.xlab('Period') +\
            ggplot.ylab('Changes') +\
            ggplot.geom_smooth() +\
            ggplot.ylim(low = 0) +\
            ggplot.scale_x_date(breaks = ggplot.date_breaks("12 months"),  labels = ggplot.date_format('%Y-%m')) +\
            ggplot.ggtitle('OpenStreetMaps Denver-Boulder\nChanges per Week')
    return p
Esempio n. 8
0
    def two_var_intr_effects(self, target, vars, nval=100, plot=True):
        """ Loads first level interactions.
        Args:
          target - Variable identifier (column name or number) specifying the
                   target variable
          vars - List of variable identifiers (column names or numbers) specifying
                 other selected variables. Must not contain target
          nval - Number of evaluation points used for calculation.
          plot - Determines whether or not to plot results.
        Returns: Pandas dataframe of interaction effects
        """
        # Check if null.models have already been generated
        check_str = """
                function(){
                  if(exists("null.models")){
                    return(T)
                  } else {
                    return(F)
                  }
                }
                """
        if not robjects.r(check_str)()[0]:
            self.logger.info(
                'Null models not generated, generating null models '
                '(n=10)')
            self._generate_interaction_null_models(10, quiet=False)

        int_str = """
              function(target, vars, nval){
                interactions <- twovarint(tvar=target, vars=vars, null.models, 
                                          nval=nval, plot=F)
              }
              """
        # Check the input type. If int, add one, if string do nothing.
        target = target if type(target) is str else target + 1
        vars = [var if type(var) is str else var + 1 for var in vars]
        r_interact = robjects.r(int_str)(target,
                                         robjects.Vector(np.array(vars)), nval)
        interact = pd.DataFrame(
            {
                'interact_str': list(r_interact[0]),
                'exp_null_int': list(r_interact[1]),
                'std_null_int': list(r_interact[2])
            },
            index=vars)

        if plot:
            int_effects = interact.reset_index().rename(
                columns={'index': 'vars'})
            int_effects_m = pd.melt(
                int_effects,
                id_vars='vars',
                value_vars=['interact_str', 'exp_null_int'])
            p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'),
                          data=int_effects_m) \
                + gg.geom_bar() \
                + gg.labs(
                    title='Two-var interaction effects - {}'.format(target))
            print(p)
        return interact
Esempio n. 9
0
 def test_ndim_2_facet_wrap_subplots(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap(
         'cut', 'clarity')
     fig, subplots = p.make_facets()
     nrow, ncol = subplots.shape
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
Esempio n. 10
0
 def test_ndim_2_facet_wrap(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap(
         'cut', 'clarity')
     nrow, ncol = p.facets.nrow, p.facets.ncol
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
     self.assertEqual(p.facets.ndim, 40)
Esempio n. 11
0
def scatter(x, y, filename=""):
    df = pd.DataFrame({ 'x': pd.Series(x), 'y': pd.Series(y) })
    p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point()
    if filename == "":
        print p
    else:
        gg.ggsave(filename="graphs/scatter/"+filename+".png", plot=p)
Esempio n. 12
0
def displacement_plot(centered, limits=None, style=None):
    u"""Draws nice displacement plots using ggplot2.

    params:
        centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably
            produced by calling center() above
        limits (real): Sets the limits of the scales to a square window showing
            ±limits on each axis.
        style (Iterable): Collection of strings. Recognized values are 'theme-bw'
            (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot'
            (which does not label the end of tracks which terminate early).

    Returns:
        g (gg.ggplot): Plot object
    """
    style = {} if style is None else style
    centered['Object'] = centered['Object'].map(str)
    centered = centered.sort(['Frame', 'Object'])
    g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) +
         gg.geom_path(size=0.3))
    g += gg.theme_bw()  # if 'theme-bw' in style else gg.theme_seaborn()
    if limits:
        g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits)
    if 'no-terminal-dot' not in style:
        max_frame = centered['Frame'].max()
        endframe = centered.groupby('Object')['Frame'].max()
        endframe = endframe[endframe != max_frame].reset_index()
        endframe = endframe.merge(centered, on=['Object', 'Frame'])
        # we should check if endframe is empty before adding it:
        # https://github.com/yhat/ggplot/issues/425
        if not endframe.empty:
            g += gg.geom_point(data=endframe, color='black', size=1)
    return g
Esempio n. 13
0
def t_sne_visualize(generated,n_sne,epoch):
    transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    #
    # mnist_ = datasets.MNIST('data/mnist', train=True, download=True, transform=transform)
    # X=mnist_.data.numpy()/255
    # y=mnist_.targets.numpy()
    # X=np.reshape(np.ravel(X), (X.shape[0], 28*28))
    n_label=7
    X_sample=generated.data.numpy()/255
    y_sample=list(range(n_label))*n_label
    X_sample=np.reshape(np.ravel(X_sample), (X_sample.shape[0], 28*28*3))

    feat_cols = [ 'pixel'+str(i) for i in range(X_sample.shape[1]) ]
    df = pd.DataFrame(X_sample,columns=feat_cols)
    df['label'] = y_sample
    df['label'] = df['label'].apply(lambda i: str(i))
    n_sne=49
    rndperm = np.concatenate((list(range(df.shape[0],df.shape[0])),np.random.permutation(df.shape[0])))
    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    print('INITIALIZED')
    tsne_results = tsne.fit_transform(df.loc[rndperm[:n_sne],feat_cols].values)
    print('AFTER FITTING')
    df_tsne = df.loc[rndperm[:n_sne],:].copy()
    df_tsne['x-tsne'] = tsne_results[:,0]
    df_tsne['y-tsne'] = tsne_results[:,1]

    chart=ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label')) \
            + geom_point(size=70, alpha =0.7) \
            + ggtitle("tSNE dimensions colored by digit")
    chart.save("tsne"+str(epoch)+".png")

    return
Esempio n. 14
0
def graph3(score_data):
    """ Box plot for scores;
        Creates and returns graph 3, a box plot. """

    date_column = score_data[0][find_time_stamp(score_data)]
    data = DataFrame(score_data[1:], columns=score_data[0])

    # Get all columns that are numerical questions
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=[date_column, "Name"],
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Get rid of unecessary column
    new_data = new_data.drop('Name', axis=1)

    # Convert date string into an actual date type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Create box plot graph
    box_plot = ggplot.ggplot(ggplot.aes(x=date_column, y='Score'), new_data) +\
        ggplot.geom_boxplot() +\
        ggplot.ggtitle("Distribution of Question Scores over Time")
    return box_plot
Esempio n. 15
0
    def density_plot(by='dpsi_zscore', categorical=True):

        if categorical:
            data_dict = {
                'muts increasing AAA':
                np.array([x[by] for x in variants['increase']]),
                'muts decreasing AAA':
                np.array([x[by] for x in variants['decrease']]),
                'muts not changing AAA length':
                np.array([x[by] for x in variants['constant']])
            }
        else:
            data_dict = OrderedDict(
                (change,
                 np.array(
                     [x[by] for x in variants['all']
                      if x['change'] == change])) for change in aaa_changes if
                len([x[by]
                     for x in variants['all'] if x['change'] == change]) > 1)

        plot = (
            ggplot(aes(x='value', colour='variable', fill='variable'),
                   data=prepare_data_frame(data_dict)) +
            ggtitle('Impact of variants affecting poly AAA sequences on %s' %
                    by) + xlab(by) + ylab('Kernel density estimate') +
            geom_density(alpha=0.6))

        return plot
Esempio n. 16
0
def scatter(x, y, filename=""):
    df = pd.DataFrame({'x': pd.Series(x), 'y': pd.Series(y)})
    p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point()
    if filename == "":
        print p
    else:
        gg.ggsave(filename="graphs/scatter/" + filename + ".png", plot=p)
Esempio n. 17
0
def plotSetOfArrays(arrays, names, fileName):
    IDS = np.linspace(0, 1, arrays[0].shape[0])
    A = IDS.reshape(arrays[0].shape[0], 1)
    for i in range(0, len(arrays)):
        A = np.concatenate((A, arrays[i]), axis=1)
    Data = pd.DataFrame(A, columns=['noise'] + names)
    Melted = pd.melt(Data, id_vars=['noise'])

    pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'),
                       data=Melted) + ggplot.geom_line() + ggplot.geom_point()
    ggplot.ggsave(pv, './IMG/' + fileName)

    output_file("iou_scores.html", title="correlation.py example")

    figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave")
    hold()
    line(IDS, arrays[0][:, 0], color='#A6CEE3', legend=names[0])
    line(IDS, arrays[1][:, 0], color='#1F78B4', legend=names[1])
    line(IDS, arrays[2][:, 0], color='#B2DF8A', legend=names[2])
    line(IDS, arrays[3][:, 0], color='#33A02C', legend=names[3])
    line(IDS, arrays[4][:, 0], color='#fb9a99', legend=names[4])

    curplot().title = "Minimum IOU"
    grid().grid_line_alpha = 0.3
    show()
Esempio n. 18
0
    def _post_density_plot(self, func=None, x_name='', plot_title='', include_doses=None, boot_samps=1000):

        from ggplot import aes, ggplot, geom_density, ggtitle
        import pandas as pd

        if include_doses is None:
            include_doses = range(1, self.num_doses + 1)

        def my_func(x, samp):
            tox_probs = _pi_T(x, mu=samp[:, 0], beta=samp[:, 1])
            eff_probs = _pi_E(x, mu=samp[:, 2], beta1=samp[:, 3], beta2=samp[:, 4])
            u = self.metric(eff_probs, tox_probs)
            return u
        if func is None:
            func = my_func

        x_boot = []
        dose_indices = []
        samp = self.pds._samp
        p = self.pds._probs
        p /= p.sum()
        for i, x in enumerate(self.scaled_doses()):
            dose_index = i+1
            if dose_index in include_doses:
                x = func(x, samp)
                x_boot.extend(np.random.choice(x, size=boot_samps, replace=True, p=p))
                dose_indices.extend(np.repeat(dose_index, boot_samps))
        df = pd.DataFrame({x_name: x_boot, 'Dose': dose_indices})
        return ggplot(aes(x=x_name, fill='Dose'), data=df) + geom_density(alpha=0.6) + ggtitle(plot_title)
Esempio n. 19
0
    def production_envelope(self,
                            dataframe,
                            grid=None,
                            width=None,
                            height=None,
                            title=None,
                            points=None,
                            points_colors=None,
                            palette=None,
                            x_axis_label=None,
                            y_axis_label=None):

        palette = self.get_option('palette') if palette is None else palette
        width = self.get_option('width') if width is None else width
        colors = self._palette(palette, len(dataframe.strain.unique()))

        plot = aes(data=dataframe,
                   ymin="lb",
                   ymax="ub",
                   x="value",
                   color=scale_colour_manual(colors)) + geom_area()
        if title:
            plot += geom_tile(title)
        if x_axis_label:
            plot += scale_x_continuous(name=x_axis_label)
        if y_axis_label:
            plot += scale_y_continuous(name=y_axis_label)

        return plot
Esempio n. 20
0
def t_sne_visualize(latent_vectors, labels, epoch):
    print(latent_vectors.shape)
    X_sample = latent_vectors.data.numpy() / 255
    feat_cols = ['pixel' + str(i) for i in range(X_sample.shape[1])]
    nsne = 1000
    df = pd.DataFrame(X_sample, columns=feat_cols)
    df['label'] = labels
    df['label'] = df['label'].apply(lambda i: str(i))
    rndperm = np.concatenate(
        (list(range(df.shape[0],
                    df.shape[0])), np.random.permutation(df.shape[0])))
    tsne = TSNE(n_components=2, verbose=1, perplexity=30)
    print('INITIALIZED')
    tsne_results = tsne.fit_transform(df.loc[rndperm[:nsne], feat_cols].values)
    print('AFTER FITTING')
    df_tsne = df.loc[rndperm[:nsne], :].copy()
    df_tsne['x-tsne'] = tsne_results[:, 0]
    df_tsne['y-tsne'] = tsne_results[:, 1]

    chart=ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label')) \
            + geom_point(size=70, alpha =0.7) \
            + ggtitle("tSNE dimensions colored by digit")
    chart.save(
        str(args.dataset) + "tsne-vae/2d-vec-miss" + str(args.remove_label) +
        "/tsne" + str(epoch) + ".png")

    return
Esempio n. 21
0
def plot_trend_season(dates, ndf_domain, x, x_trend, season, my_domain):
    # ---------------------- Prepare Data Frame ----------------------- #
    df_domain = pd.DataFrame(ndf_domain, columns=['Date', 'Volume'])
    df_domain['Date'] = dates

    x_lbl = ['Observed Volume' for i in xrange(len(x))]
    xt_lbl = ['Overall Trend' for i in xrange(len(x_trend))]
    xs_lbl = ['Repeat Sending Trend' for i in xrange(len(season))]
    col3 = pd.DataFrame(x_lbl+xt_lbl+xs_lbl)

    df_plot = pd.concat( (df_domain, col3), axis=1)
    df_plot.columns = ['Date', 'Volume', 'Data']
    
    
    # ---------------------- Plot Decomposition ----------------------- #
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
        ggplot.geom_line(color='blue', size=2) + \
        ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
        ggplot.xlab("Week (Marked on Mondays)") + \
        ggplot.ylab("Message Vol") + \
        ggplot.ggtitle("%s Message Volume by Week" % my_domain) + \
        ggplot.facet_grid('Data', scales='free_y') + \
        ggplot.theme_seaborn()

    return p
Esempio n. 22
0
 def length_dist(self, pat_out="genes_lengths.png"):
     '''Gets a list of sequence lengths, creates a dataframe and plots it using ggplot.
     Then saves the file in specified path.'''
     len_ditribution = [len(i) for i in self.num]
     df = pd.DataFrame({"record_length": np.array(len_ditribution)})
     pl = ggplot(df, aes(x="record_length")) + geom_density()
     pl.save(pat_out)
Esempio n. 23
0
def plot_matches(df_in,
                 date,
                 filename_out,
                 x_var='date_time',
                 y_var="shorthand_search_vol"):
    """
    Plot y-var and save based on specified variables.

    Assumes that df has already been filtered using dplyr's sift mechanism.
    Also assumes that a date has been passed in.
    """
    # basic data processing for viz
    df_in['date_time'] = date + " " + df_in['time'].astype(str)
    df_in['date_time'] = pd.to_datetime(df_in['date_time'],
                                        errors="coerce",
                                        infer_datetime_format=True)

    # build layers for plot
    p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"),
               data=df_in)
    p += geom_line(size=2)

    # informative
    p += labs(x="time (gmt)", y="search volume (scaled to 100)")
    # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium")
    p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes")

    # visual
    t = theme_gray()
    t._rcParams['font.size'] = 8
    t._rcParams['font.family'] = 'monospace'
    p += t

    # done
    p.save(filename_out, width=16, height=8)
Esempio n. 24
0
def main(file_path):
    # Validate raw data path
    if not os.path.exists(file_path):
        LOG_ERROR('Could not find file: {}'.format(file_path))
        return

    # Validate raw data file type
    if not file_path.endswith('.pkl'):
        LOG_ERROR('File path must be a pickle file')
        return

    with open(file_path, 'rb') as f:
        LOG_INFO('Parsing pickle file: {}'.format(file_path))
        conversation = pickle.load(f)

        LOG_INFO('Found conversation: {}'.format(conversation['conversation_name']))

        df = pd.DataFrame(conversation['messages'])
        df.columns = ['Timestamp', 'Type', 'Participant']
        # df['Datetime'] = pd.to_datetime(df['Timestamp'])
        df['Datetime'] = df['Timestamp'].apply(lambda x:
                datetime.datetime.fromtimestamp(float(x)).toordinal())

        histogram = ggplot.ggplot(df, ggplot.aes(x='Datetime', fill='Participant')) \
                        + ggplot.geom_histogram(alpha=0.6, binwidth=2) \
                        + ggplot.scale_x_date(labels='%b %Y') \
                        + ggplot.ggtitle(conversation['conversation_name']) \
                        + ggplot.ylab('Number of messages') \
                        + ggplot.xlab('Date')

        print(histogram)
Esempio n. 25
0
def extra(dataframe):
    mpl.rcParams["figure.figsize"] = "18, 4"
    plot = ggplot.ggplot(
        dataframe, ggplot.aes(x='Time', y='Speed')
    ) + ggplot.geom_path(color='lightblue', size=5) + ggplot.ggtitle(
        'Ports & Speeds') + ggplot.scale_y_reverse() + ggplot.theme_xkcd()
    plot.show()
Esempio n. 26
0
def eval(df_in, predicted, method):
    print(method)
    from ggplot import ggplot, aes, geom_point
    df = df_in
    
    df['Correct']= df[predicted] == df['donation_flag']
    df['Class'] = 'True Positive'
    df['Class'][(df[predicted] == 1) & (df['Correct'] == False)] = 'False Positive'
    df['Class'][(df[predicted] == 0) & (df['Correct'] == True)] = 'True Negative'
    df['Class'][(df[predicted] == 0) & (df['Correct'] == False)] = 'False Negative'
    
    TP = df[(df['Class'] == 'True Positive')].shape[0]
    FP = df[(df['Class'] == 'False Positive')].shape[0]
    TN = df[(df['Class'] == 'True Negative')].shape[0]
    FN = df[(df['Class'] == 'False Negative')].shape[0]
    
    print ggplot(df, aes(x='donation_count', y='m_since_donation', color = 'Class')) + geom_point()
    
    confusion = pd.DataFrame({'Positive': [FP, TP],
                              'Negative': [TN, FN]},
                              index = ['TrueNeg', 'TruePos'])
    accuracy = float(TP+TN)/float(TP + TN + FP + FN)
    precision = float(TP)/float(TP + FP)
    recall = float(TP)/float(TP + FN)
    
    print(confusion)
    print('accuracy = ' + str(accuracy))
    print('precision = ' + str(precision))
    print('recall = ' + str(recall))
    print('Done')
Esempio n. 27
0
    def _plot_cumulative_payouts(self, include_ci=True, summary=None):
        import ggplot as gg
        if summary is None:
            summary = self.summary()

        df = pd.DataFrame({'AverageCumulativePayout': summary['CumulativePayout']['Avg'],
                           'Std': summary['CumulativePayout']['Std'],
                           'Round': range(self.n_rounds)})
        if include_ci:
            df['ymin'] = df.AverageCumulativePayout - 1.96 * df.Std
            df['ymax'] = df.AverageCumulativePayout + 1.96 * df.Std
            plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout', ymin='ymin', ymax='ymax'), data=df) + \
                  gg.geom_area(alpha=0.5)
        else:
            plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout'), data=df)

        return plt + gg.geom_line()
def plot_cost_history(alpha, cost_history):

   cost_df = pandas.DataFrame({
      'Cost_History': cost_history,
      'Iteration': range(len(cost_history))
   })
   return gp.ggplot(cost_df, gp.aes('Iteration', 'Cost_History')) +\
          gp.geom_point() + gp.geom_line() + gp.ggtitle('Cost History for alpha = %.3f' % alpha )
Esempio n. 29
0
def render(data, bin_width, plot_density=False):
    if plot_density:
        plot = ggplot.ggplot(data, ggplot.aes(x='datetime', color='conversationWithName')) \
               + ggplot.geom_density() \
               + ggplot.scale_x_date(labels='%b %Y') \
               + ggplot.ggtitle('Conversation Densities') \
               + ggplot.ylab('Density') \
               + ggplot.xlab('Date')
    else:
        plot = ggplot.ggplot(data, ggplot.aes(x='datetime', fill='conversationWithName')) \
               + ggplot.geom_histogram(alpha=0.6, binwidth=bin_width) \
               + ggplot.scale_x_date(labels='%b %Y', breaks='6 months') \
               + ggplot.ggtitle('Message Breakdown') \
               + ggplot.ylab('Number of Messages') \
               + ggplot.xlab('Date')

    print(plot)
Esempio n. 30
0
    def plot(self, inputs):
        """Plot the given X and Y axes on a scatter plot"""
        if inputs.year not in self.dat.Year.values:
            return

        if inputs.xvar not in self.dat or inputs.yvar not in self.dat:
            return

        subdat = self.dat[self.dat.Year == inputs.year]
        p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar))

        p = p + geom_point()
        if inputs.shownames:
            p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1)
        if inputs.linear:
            p = p + stat_smooth(color="red", method="lm")
        return p
Esempio n. 31
0
    def plot(self, inputs):
        """Plot the given X and Y axes on a scatter plot"""
        if inputs.year not in self.dat.Year.values:
            return

        if inputs.xvar not in self.dat or inputs.yvar not in self.dat:
            return

        subdat = self.dat[self.dat.Year == inputs.year]
        p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar))

        p = p + geom_point()
        if inputs.shownames:
            p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1)
        if inputs.linear:
            p = p + stat_smooth(color="red", method="lm")
        return p
Esempio n. 32
0
 def plot_sfs(self, pat_out):
     df = pd.DataFrame({
         "freq": [i for i in range(1, len(self.sfs))],
         "sfs": np.array(self.sfs[1:len(self.sfs)])
     })
     print df
     pl = ggplot(df, aes(x="freq", weight="sfs")) + geom_bar()
     pl.save(pat_out)
Esempio n. 33
0
def plot_bin_dists(df, bin_def="distance_bin <= 500"):
    plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65

    p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def))
    p = p + gp.geom_histogram(
        fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn(
            context='talk') + gp.ggtitle(bin_def)

    return p
Esempio n. 34
0
    def histogram(self, dataframe, bins=100, width=None, height=None, palette=None, title='Histogram', values=None,
                  groups=None, legend=True):
        palette = self.__default_options__.get('palette', None) if palette is None else palette

        return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \
               geom_histogram(alpha=0.6, breaks=bins, position="fill") + \
               self._palette(palette) + \
               ggtitle(title) + \
               scale_y_continuous(name="Count (%s)" % values)
Esempio n. 35
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Esempio n. 36
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Esempio n. 37
0
def plot_deg_distrib(G):
	(in_deg, out_deg, deg) = wa.degree_distribution(G)
	in_deg_series = pd.Series(in_deg)
	out_deg_series = pd.Series(out_deg)
	in_out = { 'in_deg': in_deg_series, 'out_deg': out_deg_series }
	df = pd.DataFrame(in_out)
	df = pd.melt(df)
	p = gg.ggplot(gg.aes(x='value', color='variable', fill='variable'), data=df2) + gg.geom_histogram(alpha=0.6, binwidth=1)
	print p
Esempio n. 38
0
 def plotAverageLatency(self):
     averages = [d.averageLatency() for d in self.data]
     dat = {"device": range(1, len(averages) + 1), "average": averages}
     dataframe = pandas.DataFrame(dat)
     chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
       + ggplot.labs(title="Average Latency Per Device") + \
       ggplot.ylab("Average Latency (ms)") + \
       ggplot.xlab("Device Number")  + \
       ggplot.geom_bar(stat="identity")
     chart.show()
Esempio n. 39
0
	def plotAverageLatency(self):
		averages = [d.averageLatency() for d in self.data]
		dat = { "device" : range(1, len(averages) + 1), "average" : averages }
		dataframe = pandas.DataFrame(dat)
		chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
				+ ggplot.labs(title="Average Latency Per Device") + \
				ggplot.ylab("Average Latency (ms)") + \
				ggplot.xlab("Device Number")  + \
				ggplot.geom_bar(stat="identity")
		chart.show()
def plot_weather_data(df):  # older version
	df.DATEn = pd.to_datetime(df.DATEn)
	grouped = df.groupby('DATEn', as_index=False).sum()
	grouped.index.name = 'DATEn'
	
	p_title = 'Subway Ridership by Hour vs Raining'
	p_xlab = 'Hour of the Day'
	p_ylab = 'Subway Entries'
	plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly')) + gp.geom_line() + gp.ggtitle(p_title) + gp.xlab(p_xlab) + gp.ylab(p_ylab)
	return plot
Esempio n. 41
0
def plot_deg_distrib(G):
    (in_deg, out_deg, deg) = wa.degree_distribution(G)
    in_deg_series = pd.Series(in_deg)
    out_deg_series = pd.Series(out_deg)
    in_out = {'in_deg': in_deg_series, 'out_deg': out_deg_series}
    df = pd.DataFrame(in_out)
    df = pd.melt(df)
    p = gg.ggplot(gg.aes(x='value', color='variable', fill='variable'),
                  data=df2) + gg.geom_histogram(alpha=0.6, binwidth=1)
    print p
def plot_weather_data(df):
	df.DATEn = pd.to_datetime(df.DATEn)
	grouped = df.groupby('DATEn', as_index=False).sum()
	grouped.index.name = 'DATEn'
	plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly'))
	plot += gp.geom_line()
	plot += gp.ggtitle('Subway Ridership by Day')
	plot += gp.xlab('Date')
	plot += gp.ylab('Exits')
	return plot
Esempio n. 43
0
	def area_chart(self, conn, column1 , column2, table_chosen, title):

		data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2)

		ymin = float(input("Enter the minimum value that should be plotted:  "))
		ymax = float(input("Enter the maximum value that should be plotted:  "))

		area_plot = ggplot(aes(x=column2, ymin=ymin, ymax=ymax), data=data_df) + geom_area() + theme_gray() + labs(
			title=title)
		print(area_plot)
Esempio n. 44
0
def lineplot(hr_year_csv):
    df = pandas.read_csv(hr_year_csv)
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR"))
        + gp.geom_point(color="red")
        + gp.geom_line(color="red")
        + gp.ggtitle("Homeruns by Year")
        + gp.xlab("Homeruns")
        + gp.ylab("Year")
    )
    return gg
Esempio n. 45
0
 def plot(self):
     prob231g_plot_df = self.data.copy()
     for k in range(self.num_clusters):
         n = prob231g_plot_df.shape[0]
         prob231g_plot_df.loc[n] = self.cluster_centers[k]
     prob231g_plot_df["class_label"] = [label for label in self.class_label] + \
                                       self.num_clusters * ["center"]
     p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \
         gg.geom_point() + gg.ggtitle("EM cluster assignments")
     print p
     return
Esempio n. 46
0
def lineplot_compare(filename):
    df = pd.read_csv(filename)
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID"))
        + gp.geom_point()
        + gp.geom_line()
        + gp.ggtitle("Homeruns by Year by Team")
        + gp.xlab("Homeruns")
        + gp.ylab("Year")
    )
    return gg
Esempio n. 47
0
def visualize_segmentation(X, var):
    '''
    Prints with ggplot a visualization of the different segments.
    '''
    aux = pandas.DataFrame(index = X.index)
    
    aux['fecha'] = X.index.values
    aux[var] = X[var]
    aux['Segmento'] = X['segmento'].astype(str)
    
    return ggplot(aes(x="fecha", y=var, color="Segmento"), aux) + geom_point() + xlab("Fecha") + ylab(var) + ggtitle("Segmentacion de la variable \"" + var + "\"") +  theme(axis_text_x  = element_text(color=[0,0,0,0]))
Esempio n. 48
0
    def heatmap(self, dataframe, y=None, x=None, values=None, width=None, height=None,
                max_color=None, min_color=None, mid_color=None, title='Heatmap'):
        max_color = self.__default_options__.get('max_color', None) if max_color is None else max_color
        min_color = self.__default_options__.get('min_color', None) if min_color is None else min_color
        mid_color = self.__default_options__.get('mid_color', None) if mid_color is None else mid_color
        width = self.__default_options__.get('width', None) if width is None else width

        palette = gradient(min_color, mid_color, max_color)
        return ggplot(dataframe, aes(x=x, y=y, fill=values)) + \
               geom_tile() + \
               self._palette(palette, "div")
Esempio n. 49
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() +
         gg.facet_wrap("variant.type", "category") + gg.theme_seaborn())
    gg.ggsave(p, out_file)
Esempio n. 50
0
    def flux_variability_analysis(self,
                                  dataframe,
                                  grid=None,
                                  width=None,
                                  height=None,
                                  title=None,
                                  palette=None,
                                  x_axis_label=None,
                                  y_axis_label=None):

        return aes(data=dataframe, )
Esempio n. 51
0
def prob231cd_recover(initialization):
    filename = "results/prob231cd" + initialization
    tuple_in = pkl.load(open(filename + ".pkl", "rb"))
    prob231c_plot_df = tuple_in[0]
    kmcalls = tuple_in[1]
    num_trials = tuple_in[2]
    p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \
        gg.geom_point() + gg.ggtitle(initialization + " initialization")
    gg.ggsave(filename + ".png", plot = p)
    obj = [kmcalls[i].obj for i in range(num_trials)]
    obj_stats = {"mean":np.mean(obj), "sd":np.std(obj), "min":np.min(obj)}
    return obj_stats
Esempio n. 52
0
def plotHistogramMeans(hist,fileName):
  num_clust = hist.shape[0]
  IDS = np.mat(range(0,num_clust))
  IDS = IDS.reshape(num_clust,1)

  histD = np.concatenate((IDS,hist),axis=1)

  Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1]))
  Melted = pd.melt(Data,id_vars=['ID'])
  pv =  ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) +  ggplot.geom_line()  + ggplot.facet_wrap("ID")
  print "Saving mean histograms"
  ggplot.ggsave(pv,'./IMG/'+fileName)
Esempio n. 53
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar()
         + gg.facet_wrap("variant.type", "category")
         + gg.theme_seaborn())
    gg.ggsave(p, out_file)
Esempio n. 54
0
    def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None,
                yaxis_label=None):
        color = self.__default_options__.get('palette', None) if color is None else color
        width = self.__default_options__.get('width', None) if width is None else width

        gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title)
        if xaxis_label:
            gg += scale_x_continuous(name=xaxis_label)
        if yaxis_label:
            gg += scale_y_continuous(name=xaxis_label)

        return gg
Esempio n. 55
0
    def _plot_avg_accuracy(self, include_ci=True, summary=None):
        import ggplot as gg
        if summary is None:
            summary = self.summary()

        df = pd.DataFrame({'AverageAccuracy': summary['Accuracy']['Avg'], 'Round': range(self.n_rounds)})

        if include_ci:
            from scipy import stats
            succ = df.AverageAccuracy * self.n_sim
            fail = self.n_sim - succ
            interval = stats.beta(succ + 1, fail + 1).interval(0.95)

            df['ymin'] = interval[0]
            df['ymax'] = interval[1]
            plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy', ymin='ymin', ymax='ymax'), data=df) + \
                gg.geom_area(alpha=0.5)
        else:
            plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy'), data=df)

        return plt + gg.geom_line()
Esempio n. 56
0
def prob231b(initialization = "regular"):
    cluster_counts = [2,3,5,10,15,20]
    kmcalls = [0 for i in cluster_counts]
    for i, num_clusters in enumerate(cluster_counts):
        kmcalls[i] = KmeansCall(features_only, num_clusters, initialization)
        kmcalls[i].run_kmeans(verbose = False)

        df_to_plot = kmcalls[i].data.copy()
        df_to_plot["class_label"] = [label for label in kmcalls[i].class_label]
        p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters))
        metadata = "k=" + str(num_clusters) + "_" + datestring
        gg.ggsave(filename = "results/" + metadata +".png", plot = p)
Esempio n. 57
0
def lineplot_compare(filename):  # Cleaner version with string vars
    df = pd.read_csv(filename)
    p_title = "Homeruns by Year by Team"
    p_xlab = "Homeruns"
    p_ylab = "Year"
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID"))
        + gp.geom_point()
        + gp.geom_line()
        + gp.ggtitle(p_title)
        + gp.xlab(p_xlab)
        + gp.ylab(p_ylab)
    )
    return gg
Esempio n. 58
0
def main(log):

    log.debug('initializing app')
    p = pyaudio.PyAudio()

    # Open audio input stream
    stream = p.open(format = FORMAT,
        channels = CHANNELS,
        rate = SAMPLE_RATE,
        input = True,
        frames_per_buffer = CHUNK_SIZE)

    log.debug('opened stream <{}>'.format(stream))
    log.debug('reading audio input at rate <{}>'.format(SAMPLE_RATE))

    recorded = []

    # Start mainloop
    loops = 0
    while True:
        loops += 1
        if loops % 25 == 0: log.debug('recorded <{}> loops'.format(loops))

        # Decode chunks of audio data from the stream
        try:
            data = stream.read(CHUNK_SIZE)
            decoded = np.fromstring(data, 'Float32');
            mx = max(decoded)
            recorded.append(mx)

        # On <C-c>, plot max of recorded data
        except KeyboardInterrupt as ee:
            log.debug('closing stream and ending PyAudio')
            stream.close()
            p.terminate()
            df = pd.DataFrame(columns = ['mx', 'time'])
            df['mx'] = recorded
            df['time'] = range(len(recorded))
            plt = ggplot.ggplot(ggplot.aes(x='time', y='mx'), data=df) +\
                        ggplot.geom_line()
            pdb.set_trace()
            log.debug('quitting')
            sys.exit(1)
Esempio n. 59
0
def prob231g():
    filename = "results/prob231g"

    num_clusters_231g = 3
    emcall = EMCall(features_only, labels_only, num_clusters_231g)
    emcall.run_em()

    plt.plot(emcall.log_likelihood_record)
    plt.title("Likelihood over EM iterations")
    plt.savefig(filename + "_loglike.png")

    prob231g_plot_df = emcall.data.copy()
    prob231g_plot_df["class_label"] = [label for label in emcall.class_label]
    p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \
        gg.geom_point() + gg.ggtitle("EM cluster assignments")
    gg.ggsave(filename + "_clusters.png", plot = p)

    pkl.dump(obj = emcall, file = open(filename + "_a.pkl", "wb"))
    print("Done with 231g.")
    return
Esempio n. 60
0
def data_output(data, chart_title):
		print "Good News! You're data has been returned. I'm happy to show it to you."
		print "Just tell me how you want it - Table or Line Graph?"

		data_output = raw_input("Choose table or line > ")

		if data_output[0].lower() == "t":
			print "Ok, here's your data."
			print data
		elif data_output[0] == "l" or data_output[0].lower() =="g":
			import ggplot as gg 

			plot = gg.ggplot(gg.aes(x='Month, Year', y='Value'), data=data) + \
    			gg.geom_point(color='black') + \
    			gg.geom_line(color='green') + \
    			gg.ggtitle(chart_title) + \
    			gg.xlab("Month, Year") + \
    			gg.ylab("Value") 
    			gg.scale_x_date(breaks = gg.date_breaks('1 month'), labels= gg.date_format("%B"))

			print (plot + gg.theme_xkcd())