def scat(**kwds): return plt.scatter_matrix(df, **kwds)
comp1 = np.random.normal(0, 1, size=200) # N(0, 1) comp2 = np.random.normal(10, 2, size=200) # N(10, 4) values = pd.Series(np.concatenate([comp1, comp2])) values.hist(bins=100, alpha=0.3, color='g', normed=True) values.plot(kind='kde', style='r--') # create a scatter plot macro = pd.read_csv( '/Users/pirminlemberger/PycharmProjects/PythonBook/text files/ch08/macrodata.csv' ) data = macro[['cpi', 'm1', 'tbilrate', 'unemp']] trans_data = np.log(data).diff().dropna() plt.scatter(trans_data['m1'], trans_data['unemp']) plt.title('Changes in log %s vs. log %s' % ('m1', 'unemp')) plt.scatter_matrix(trans_data, diagonal='kde', color='k', alpha=0.3) # make a scatter plot for each pair of variables in a DataFrame to show their correlations pd.scatter_matrix(trans_data, diagonal='kde', color='k', alpha=0.3) # =============================================== # chapter 9 Data Aggregation and Group Operations # =============================================== # apply an aggregation function to a DataFrame df = pd.DataFrame({ 'key1': ['a', 'a', 'b', 'b', 'a'], 'key2': ['one', 'two', 'one', 'two', 'one'], 'data1': np.random.randn(5), 'data2': np.random.randn(5) })