import pandas as pd
import numpy as np
from IPython import InteractiveShell

inter = InteractiveShell()
pd.options.display.max_columns = 50

college = pd.read_csv('data/college.csv')
college2 = college.set_index('STABBR')
print(college2.index.is_monotonic)

college3 = college2.sort_index()
print(college3.index.is_monotonic)
print(inter.get_ipython().run_line_magic('timeit',
                                         "college[college['STABBR'] == 'TX']"))
print(inter.get_ipython().run_line_magic('timeit', "college2.loc['TX']"))
print(inter.get_ipython().run_line_magic('timeit', "college3.loc['TX']"))

college_unique = college.set_index('INSTNM')
print(college_unique.index.is_unique)

college[college['INSTNM'] == 'Stanford University']
print(college_unique.loc['Stanford University'])
print(inter.get_ipython().run_line_magic(
    'timeit', "college[college['INSTNM'] == 'Stanford University']"))
print(inter.get_ipython().run_line_magic(
    'timeit', "college_unique.loc['Stanford University']"))

college.index = college['CITY'] + ', ' + college['STABBR']
college = college.sort_index()
print(college.head())
예제 #2
0
import numpy as np
import matplotlib.pyplot as plt
from IPython import InteractiveShell

inter = InteractiveShell()
pd.options.display.max_columns = 50

movie = pd.read_csv('data/movie.csv', index_col='movie_title')
c1 = movie['title_year'] >= 2010
c2 = movie['title_year'].isnull()
criteria = c1 | c2
print(movie.mask(criteria).head())

movie_mask = movie.mask(criteria).dropna(how='all')
print(movie_mask.head())

movie_boolean = movie[movie['title_year'] < 2010]
print(movie_boolean.head())
print(movie_mask.equals(movie_boolean))

print(movie_mask.shape == movie_boolean.shape)
print(movie_mask.dtypes == movie_boolean.dtypes)

from pandas.testing import assert_frame_equal
assert_frame_equal(movie_boolean, movie_mask, check_dtype=False)

print(inter.get_ipython().run_line_magic(
    'timeit', "movie.mask(criteria).dropna(how='all')"))
print(inter.get_ipython().run_line_magic('timeit',
                                         "movie[movie['title_year'] < 2010]"))
예제 #3
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython import InteractiveShell

inter = InteractiveShell()
inter.get_ipython().run_line_magic('matplotlib', 'inline')
pd.options.display.max_columns = 50

amzn = pd.read_csv('data/amzn_stock.csv',
                   index_col='Date',
                   parse_dates=['Date'])
print(amzn.head())

amzn_daily_return = amzn.Close.pct_change()
print(amzn_daily_return.head())

amzn_daily_return = amzn_daily_return.dropna()
print(amzn_daily_return.hist(bins=20))

mean = amzn_daily_return.mean()
std = amzn_daily_return.std()
abs_z_score = amzn_daily_return.sub(mean).abs().div(std)
pcts = [abs_z_score.lt(i).mean() for i in range(1, 4)]
print('{:.3f} fall within 1 standard deviation. '
      '{:.3f} within 2 and {:.3f} within 3'.format(*pcts))


def test_return_normality(stock_data):
    close = stock_data['Close']
    daily_return = close.pct_change().dropna()
예제 #4
0
import pandas as pd
import numpy as np
from IPython import InteractiveShell

inter = InteractiveShell()

college = pd.read_csv('data/college.csv', index_col='INSTNM')
cn = 'Texas A & M University-College Station'
print(college.loc[cn, 'UGDS_WHITE'])
print(college.at[cn, 'UGDS_WHITE'])
print(inter.get_ipython().run_line_magic('timeit', "college.loc[cn, 'UGDS_WHITE']"))
print(inter.get_ipython().run_line_magic('timeit', "college.at[cn, 'UGDS_WHITE']"))

row_num = college.index.get_loc(cn)
col_num = college.columns.get_loc('UGDS_WHITE')
print(row_num, col_num)

print(inter.get_ipython().run_line_magic('timeit', 'college.iloc[row_num, col_num]'))
print(inter.get_ipython().run_line_magic('timeit', 'college.iat[row_num, col_num]'))
print(inter.get_ipython().run_line_magic('timeit', 'college.iloc[5, col_num]'))
print(inter.get_ipython().run_line_magic('timeit', 'college.iat[5, col_num]'))

state = college['STABBR']
print(state.iat[1000])
print(state.at['Stanford University'])
import pandas as pd
import numpy as np
from IPython import InteractiveShell

inter = InteractiveShell()
pd.options.display.max_columns = 50

college = pd.read_csv('data/college.csv')
print(college[college['STABBR'] == 'TX'].head())

college2 = college.set_index('STABBR')
print(college2.loc['TX'].head())

print(inter.get_ipython().run_line_magic('timeit',
                                         "college[college['STABBR'] == 'TX']"))
print(inter.get_ipython().run_line_magic('timeit', "college2.loc['TX']"))
print(inter.get_ipython().run_line_magic(
    'timeit', "college2 = college.set_index('STABBR')"))

states = ['TX', 'CA', 'NY']
print(college[college['STABBR'].isin(states)])
print(college2.loc[states].head())