def test_scout(): '''(3 points) scout''' # new players to hire x = scout() # current members of the team y = [ 'bradfch01', # OAK 2002 'chaveer01', 'colanmi01', 'dyeje01', 'hernara02', 'hiljuer01', 'holtzmi01', 'hudsoti01', 'kochbi01', 'lidleco01', 'longte01', 'magnami01', 'mecirji01', 'menecfr01', 'muldema01', 'myersgr01', 'penaca01', 'saenzol01', 'tamje01', 'tejadmi01', 'valdema02', 'velarra01', 'venafmi01', 'zitoba01' ] assert type(x) == list assert type(x[0]) == str assert len(x) == 3 # only choose 3 players # the hires should not be in the team already assert x[0] not in y assert x[1] not in y assert x[2] not in y # cannot hire the same player twice assert x[0] != x[1] assert x[0] != x[2] assert x[1] != x[2] # the new team T = x + y # check budget D = load_csv('Batting2001AJS.csv') S = sum_salaries(T, D) print('salaries:', S) assert S <= 40004167 # too expensive, exceed budget # the expected number of runs by the team should be larger than 700 print('runs:', runs(T, D)) assert runs(T, D) >= 700
def test_join_batting(): '''(3 points) join_batting''' x= load_csv('Batting2001A.csv') y= load_csv('People.csv') z=join_batting(x,y) assert z.shape == (1220, 43) assert z.dtypes.H == int assert z.dtypes.AB == int assert z[z.playerID=='abreubo01'].iloc[0].G ==162 assert z[z.playerID=='abreubo01'].iloc[0].birthYear ==1974 assert z[z.playerID=='bradlmi01'].iloc[0].G == 77 assert z[z.playerID=='bradlmi01'].iloc[0].birthYear == 1978 assert z[z.playerID=='bradlmi01'].iloc[0].nameLast =='Bradley' assert z[z.playerID=='bradlmi01'].iloc[0].nameFirst =='Milton' # Now let's save the result into a CSV file, for further analysis save_csv(z,'Batting2001AJ.csv')
def test_join_salary(): '''(3 points) join_salary''' x = load_csv('Batting2001AJ.csv') y = load_csv('Salaries2002.csv') z = join_salary(x, y) assert z.shape == (786, 47) assert z[z.playerID == 'anderga01'].iloc[0].G == 161 assert z[z.playerID == 'anderga01'].iloc[0].nameLast == 'Anderson' assert z[z.playerID == 'anderga01'].iloc[0].salary == 5000000 assert z[z.playerID == 'miltoer01'].iloc[0].G == 35 assert z[z.playerID == 'miltoer01'].iloc[0].nameLast == 'Milton' assert z[z.playerID == 'miltoer01'].iloc[0].salary == 4000000 assert z[z.playerID == 'woodwch01'].iloc[0].G == 37 assert z[z.playerID == 'woodwch01'].iloc[0].nameLast == "Woodward" assert z[z.playerID == 'woodwch01'].iloc[0].salary == 235000 # Now let's save the result into a CSV file, for further analysis save_csv(z, 'Batting2001AJS.csv')
def test_runs(): '''(2 points) runs''' # test with a team of one player T = ['hernara02'] D = load_csv('Batting2001AJS.csv') R = runs(T, D) assert np.allclose(R, 57.3877, atol=0.01) T = ['hernara02', 'hattesc01'] R = runs(T, D) assert np.allclose(R, 88.7553, atol=0.01)
def test_rank_OBP(): '''(3 points) rank_OBP''' D = load_csv('Batting2001AJS.csv') R = rank_OBP(D) assert type(R) == list assert len(R) == 786 assert R[0] == 'berkmla01' assert R[1] == 'pujolal01' assert R[2] == 'giambje01' # OAK team hired Jeremy Giambi in 2002 assert R[3] == 'mientdo01' assert R[4] == 'pierrju01'
def test_rank_BA(): '''(3 points) rank_BA''' D = load_csv('Batting2001AJS.csv') R = rank_BA(D) assert type(R) == list assert len(R) == 786 assert R[0] == 'berkmla01' assert R[1] == 'pujolal01' assert R[2] == 'pierrju01' assert R[3] == 'loducpa01' assert R[4] == 'millake01'
def test_sum_salaries(): '''(2 points) sum_salaries''' # test with a team of one player T = ['bradfch01'] D = load_csv('Batting2001AJS.csv') S = sum_salaries(T, D) assert S == 235000 T = ['bradfch01', 'chaveer01', 'colanmi01'] S = sum_salaries(T, D) print(S) assert S == 2660000
def load_batting(filename='Batting.csv'): ''' load batting data from a CSV file. Input: filename: a string indicating the filename of the CSV file. Output: X: a pandas dataframe, loaded from the CSV file Hint: you could solve this problem using one line of code. ''' ######################################### ## INSERT YOUR CODE HERE X = load_csv(filename) ######################################### return X ''' TEST: Now you can test the correctness of your code above by typing `nosetests -v test4.py:test_load_batting' in the terminal. '''
def test_search_salary(): '''(2 points) search_salary''' x= load_csv('Salaries.csv') y=search_salary(x) # Now let's save the result into a CSV file, for further analysis save_csv(y,'Salaries2002.csv') assert y.shape == (846, 5) assert y[y.playerID=='anderga01'].iloc[0].yearID == 2002 assert y[y.playerID=='anderga01'].iloc[0].salary == 5000000 assert y[y.playerID=='miltoer01'].iloc[0].yearID == 2002 assert y[y.playerID=='miltoer01'].iloc[0].salary == 4000000 assert y[y.playerID=='woodwch01'].iloc[0].yearID == 2002 assert y[y.playerID=='woodwch01'].iloc[0].salary == 235000
def test_sum_stat(): '''(2 points) sum_stat''' # test with a team of one player T = ['hernara02'] D = load_csv('Batting2001AJS.csv') S = sum_stat(T, D, key='H') assert S == 115 T = ['hernara02', 'hattesc01'] S = sum_stat(T, D, key='H') assert S == (115 + 68) S = sum_stat(T, D, key='G') assert S == (136 + 94)
def test_aggregate_batting(): '''(3 points) aggregate_batting''' x= load_csv('Batting2001.csv') y=aggregate_batting(x) # after removing the column 'teamID' and 'lgID' (string cannot be added in aggregation), we have 20 columns assert y.shape == (1220, 20) assert y.dtypes.H == int assert y.dtypes.AB == int assert y.iloc[1,3] == 28 # the 'G' column for player 'abbotje01' assert y.iloc[1,4] == 42 # the 'AB' column for player 'abbotje01' assert y.iloc[7,3] == 59 # the 'G' column for player 'aceveju01' assert y.iloc[7,4] == 3 # the 'AB' column for player 'aceveju01' assert y.iloc[84,3] == 46 # the 'G' column for player 'bennega01' assert y.iloc[84,4] == 131 # the 'AB' column for player 'bennega01' assert y.iloc[84,-1] == 1 # the 'GIDP' column for player 'bennega01' # Now let's save the result into a CSV file, for further analysis save_csv(y,'Batting2001A.csv')
def test_search_batting(): '''(2 points) search_batting''' x=load_batting() y=search_batting(x, year=1999) assert y.shape == (1299, 22) print(y) assert y.dtypes.H == int assert y.dtypes.AB == int assert y.iloc[0,1] == 1999 assert y.iloc[1200,1] == 1999 assert y.iloc[0,6] == 57 assert y.iloc[1,6] == 21 y=search_batting(x) assert type(y) == pd.DataFrame assert y.shape == (1339, 22) assert y.dtypes.H == int assert y.dtypes.AB == int assert y.iloc[0,1] == 2001 assert y.iloc[1338,1] == 2001 assert y.iloc[0,6] == 1 assert y.iloc[1,6] == 42 # Now let's save the dataset of year 2001 into a CSV file, for further analysis save_csv(y,'Batting2001.csv') # check the correctness of the saved data z= load_csv('Batting2001.csv') assert type(z) == pd.DataFrame assert z.shape == (1339, 22) assert z.dtypes.H == int assert z.dtypes.AB == int assert z.iloc[0,1] == 2001 assert z.iloc[1338,1] == 2001 assert z.iloc[0,6] == 1 assert z.iloc[1,6] == 42