Esempio n. 1
0
 def scat(**kwds):
     return plt.scatter_matrix(df, **kwds)
Esempio n. 2
0
 def scat(**kwds):
     return plt.scatter_matrix(df, **kwds)
Esempio n. 3
0
comp1 = np.random.normal(0, 1, size=200)  # N(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # N(10, 4)
values = pd.Series(np.concatenate([comp1, comp2]))
values.hist(bins=100, alpha=0.3, color='g', normed=True)
values.plot(kind='kde', style='r--')

# create a scatter plot
macro = pd.read_csv(
    '/Users/pirminlemberger/PycharmProjects/PythonBook/text files/ch08/macrodata.csv'
)
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title('Changes in log %s vs. log %s' % ('m1', 'unemp'))

plt.scatter_matrix(trans_data, diagonal='kde', color='k', alpha=0.3)

# make a scatter plot for each pair of variables in a DataFrame to show their correlations
pd.scatter_matrix(trans_data, diagonal='kde', color='k', alpha=0.3)

# ===============================================
# chapter 9 Data Aggregation and Group Operations
# ===============================================

# apply an aggregation function to a DataFrame
df = pd.DataFrame({
    'key1': ['a', 'a', 'b', 'b', 'a'],
    'key2': ['one', 'two', 'one', 'two', 'one'],
    'data1': np.random.randn(5),
    'data2': np.random.randn(5)
})