import pandas as pd import numpy as np from IPython import InteractiveShell inter = InteractiveShell() pd.options.display.max_columns = 50 college = pd.read_csv('data/college.csv') college2 = college.set_index('STABBR') print(college2.index.is_monotonic) college3 = college2.sort_index() print(college3.index.is_monotonic) print(inter.get_ipython().run_line_magic('timeit', "college[college['STABBR'] == 'TX']")) print(inter.get_ipython().run_line_magic('timeit', "college2.loc['TX']")) print(inter.get_ipython().run_line_magic('timeit', "college3.loc['TX']")) college_unique = college.set_index('INSTNM') print(college_unique.index.is_unique) college[college['INSTNM'] == 'Stanford University'] print(college_unique.loc['Stanford University']) print(inter.get_ipython().run_line_magic( 'timeit', "college[college['INSTNM'] == 'Stanford University']")) print(inter.get_ipython().run_line_magic( 'timeit', "college_unique.loc['Stanford University']")) college.index = college['CITY'] + ', ' + college['STABBR'] college = college.sort_index() print(college.head())
import numpy as np import matplotlib.pyplot as plt from IPython import InteractiveShell inter = InteractiveShell() pd.options.display.max_columns = 50 movie = pd.read_csv('data/movie.csv', index_col='movie_title') c1 = movie['title_year'] >= 2010 c2 = movie['title_year'].isnull() criteria = c1 | c2 print(movie.mask(criteria).head()) movie_mask = movie.mask(criteria).dropna(how='all') print(movie_mask.head()) movie_boolean = movie[movie['title_year'] < 2010] print(movie_boolean.head()) print(movie_mask.equals(movie_boolean)) print(movie_mask.shape == movie_boolean.shape) print(movie_mask.dtypes == movie_boolean.dtypes) from pandas.testing import assert_frame_equal assert_frame_equal(movie_boolean, movie_mask, check_dtype=False) print(inter.get_ipython().run_line_magic( 'timeit', "movie.mask(criteria).dropna(how='all')")) print(inter.get_ipython().run_line_magic('timeit', "movie[movie['title_year'] < 2010]"))
import pandas as pd import numpy as np import matplotlib.pyplot as plt from IPython import InteractiveShell inter = InteractiveShell() inter.get_ipython().run_line_magic('matplotlib', 'inline') pd.options.display.max_columns = 50 amzn = pd.read_csv('data/amzn_stock.csv', index_col='Date', parse_dates=['Date']) print(amzn.head()) amzn_daily_return = amzn.Close.pct_change() print(amzn_daily_return.head()) amzn_daily_return = amzn_daily_return.dropna() print(amzn_daily_return.hist(bins=20)) mean = amzn_daily_return.mean() std = amzn_daily_return.std() abs_z_score = amzn_daily_return.sub(mean).abs().div(std) pcts = [abs_z_score.lt(i).mean() for i in range(1, 4)] print('{:.3f} fall within 1 standard deviation. ' '{:.3f} within 2 and {:.3f} within 3'.format(*pcts)) def test_return_normality(stock_data): close = stock_data['Close'] daily_return = close.pct_change().dropna()
import pandas as pd import numpy as np from IPython import InteractiveShell inter = InteractiveShell() college = pd.read_csv('data/college.csv', index_col='INSTNM') cn = 'Texas A & M University-College Station' print(college.loc[cn, 'UGDS_WHITE']) print(college.at[cn, 'UGDS_WHITE']) print(inter.get_ipython().run_line_magic('timeit', "college.loc[cn, 'UGDS_WHITE']")) print(inter.get_ipython().run_line_magic('timeit', "college.at[cn, 'UGDS_WHITE']")) row_num = college.index.get_loc(cn) col_num = college.columns.get_loc('UGDS_WHITE') print(row_num, col_num) print(inter.get_ipython().run_line_magic('timeit', 'college.iloc[row_num, col_num]')) print(inter.get_ipython().run_line_magic('timeit', 'college.iat[row_num, col_num]')) print(inter.get_ipython().run_line_magic('timeit', 'college.iloc[5, col_num]')) print(inter.get_ipython().run_line_magic('timeit', 'college.iat[5, col_num]')) state = college['STABBR'] print(state.iat[1000]) print(state.at['Stanford University'])
import pandas as pd import numpy as np from IPython import InteractiveShell inter = InteractiveShell() pd.options.display.max_columns = 50 college = pd.read_csv('data/college.csv') print(college[college['STABBR'] == 'TX'].head()) college2 = college.set_index('STABBR') print(college2.loc['TX'].head()) print(inter.get_ipython().run_line_magic('timeit', "college[college['STABBR'] == 'TX']")) print(inter.get_ipython().run_line_magic('timeit', "college2.loc['TX']")) print(inter.get_ipython().run_line_magic( 'timeit', "college2 = college.set_index('STABBR')")) states = ['TX', 'CA', 'NY'] print(college[college['STABBR'].isin(states)]) print(college2.loc[states].head())