def trellis_plot_histogram(): """ Trellis Plot arranges data in a rectangular grid by values of certain attributes using a histogram """ plot =rplot.RPlot(TIPS_DATA, x='total_bill', y='tip') plot.add(rplot.TrellisGrid(['sex', 'smoker'])) plot.add(rplot.GeomHistogram()) plot.render(plt.gcf())
def cond_hists(df, plot_cols, grid_col): import matplotlib.pyplot as plt import pandas.tools.rplot as rplot ## Loop over the list of columns for col in plot_cols: ## Define figure fig = plt.figure(figsize=(14, 4)) fig.clf() ax = fig.gca() ## Setup plot and grid and plot the data plot = rplot.RPlot(df, x = col, y = '.') plot.add(rplot.TrellisGrid(['.', grid_col])) plot.add(rplot.GeomHistogram()) ax.set_title('Histograms of ' + col + ' conditioned by ' + grid_col + '\n') plot.render() return grid_col
def main(): projects_all = pd.read_csv(open('../../dataset/projects.csv', 'r')) outcomes = pd.read_csv(open('../../dataset/outcomes.csv', 'r')) projects = pd.merge(projects_all, outcomes, on='projectid') projects1 = pd.DataFrame( projects.fillna(''), columns=['projectid', 'primary_focus_area', 'is_exciting']) projects1['cat_primary_focus_area'] = pd.factorize( projects1.primary_focus_area)[0] print(projects1) plt.figure() plot = rplot.RPlot( projects1, x='cat_primary_focus_area', ) plot.add(rplot.TrellisGrid(['.', 'is_exciting'])) plot.add(rplot.GeomHistogram()) plot.render(plt.gcf()) pylab.show()
plt.pyplot.pie(tweetdf.tweet_type.value_counts()) crosstabs = pd.crosstab(tweetdf.tweet_day, tweetdf.tweet_type) crosstabs.plot(kind='bar', stacked=True) df['col'].hist(bins=25) #splom - diagonal can be 'hist' pd.scatter_matrix(trans_data, diagonal = 'kde', color = 'k', alpha=0.3) #trellis tips_data = pd.read_csv('tips.csv') import pandas.tools.rplot as rplot plt.figure() plot = rplot.RPlot(tips_data, x='total_bill', y='tip') plot.add(rplot.TrellisGrid(['sex', 'smoker'])) plot.add(rplot.GeomHistogram()) plot.render(plt.gcf()) #--------------------------------------------------------------------------- # Stats - from scipy #some useful functions np.random.randn(4, 3) years = range(1880,2011) shape() #check that series values are close to but not exactly 1 np.allclose(x, 1)