def trellis_plot_histogram():
    """ Trellis Plot arranges data in a rectangular grid by values of certain attributes using a histogram """

    plot =rplot.RPlot(TIPS_DATA, x='total_bill', y='tip')
    plot.add(rplot.TrellisGrid(['sex', 'smoker']))
    plot.add(rplot.GeomHistogram())
    plot.render(plt.gcf())
def cond_hists(df, plot_cols, grid_col):
    import matplotlib.pyplot as plt
    import pandas.tools.rplot as rplot
    ## Loop over the list of columns
    for col in plot_cols:
        ## Define figure
        fig = plt.figure(figsize=(14, 4))
        fig.clf()
        ax = fig.gca()
        ## Setup plot and grid and plot the data
        plot = rplot.RPlot(df, x = col, 
                                  y = '.') 
        plot.add(rplot.TrellisGrid(['.', grid_col]))
        plot.add(rplot.GeomHistogram())
        ax.set_title('Histograms of ' + col + ' conditioned by ' + grid_col + '\n')
        plot.render()
    return grid_col        
Beispiel #3
0
def main():
    projects_all = pd.read_csv(open('../../dataset/projects.csv', 'r'))
    outcomes = pd.read_csv(open('../../dataset/outcomes.csv', 'r'))
    projects = pd.merge(projects_all, outcomes, on='projectid')
    projects1 = pd.DataFrame(
        projects.fillna(''),
        columns=['projectid', 'primary_focus_area', 'is_exciting'])
    projects1['cat_primary_focus_area'] = pd.factorize(
        projects1.primary_focus_area)[0]
    print(projects1)
    plt.figure()
    plot = rplot.RPlot(
        projects1,
        x='cat_primary_focus_area',
    )
    plot.add(rplot.TrellisGrid(['.', 'is_exciting']))
    plot.add(rplot.GeomHistogram())
    plot.render(plt.gcf())
    pylab.show()
plt.pyplot.pie(tweetdf.tweet_type.value_counts())

crosstabs = pd.crosstab(tweetdf.tweet_day, tweetdf.tweet_type)
crosstabs.plot(kind='bar', stacked=True)
df['col'].hist(bins=25)

#splom - diagonal can be 'hist'
pd.scatter_matrix(trans_data, diagonal = 'kde', color = 'k', alpha=0.3)

#trellis
tips_data = pd.read_csv('tips.csv')
import pandas.tools.rplot as rplot
plt.figure()
plot = rplot.RPlot(tips_data, x='total_bill', y='tip')
plot.add(rplot.TrellisGrid(['sex', 'smoker']))
plot.add(rplot.GeomHistogram())
plot.render(plt.gcf())

#---------------------------------------------------------------------------
# Stats - from scipy


#some useful functions
np.random.randn(4, 3)
years = range(1880,2011)

shape()
#check that series values are close to but not exactly 1
np.allclose(x, 1)