def team_preprocessing(): """ Get the Data and process it how I want it :return DataFrame """ cols = [ "Team", "Season", "Game.ID", "Date", "Opponent", "Venue", "TOI", "GF", 'GA' ] # Get Game Data for Teams df = pd.read_csv("./data/teams_all_sits.csv") for team_col in ['Team', "Opponent", "Venue"]: df = helpers.fix_team(df, team_col) df = df[cols] # Get "correct" game id df['game_id'] = df.apply( lambda x: int(str(x['Season']) + "0" + str(x['Game.ID'])), axis=1) df = df.drop(['Game.ID'], axis=1) # Sort by game_id...lowest is first df = df.sort_values(by=['game_id']) # Merge in game outcomes: df = helpers.merge_outcomes(df) # Only keeps games from the home team perspective!!!!!!!! df = df[df['Team'] == df['Venue']] return df
def get_pp_data(): """ Get and prepare the power play data :return: DataFrame of data """ cols = [ "Team", "Season", "Game.ID", "Date", "TOI", 'GF', 'FF', 'xGF', 'CF', 'wshF' ] df = pd.read_csv("../projection_data/teams/teams_pp.csv") df = helpers.fix_team(df, "Team") df = df.sort_values(by=['Season', 'Game.ID', 'Team']) df['wshF'] = ((df['CF'] - df['GF']) * .2 + df['GF']) df = df[cols] df = df.rename(index=str, columns={col: col + "_pp" for col in cols[4:]}) return df
def get_all_sits_data(): """ Process the All Situations DataFrame :return: Processed All Situations DataFrame """ cols = [ 'player', 'player_id', 'season', 'game_id', 'date', 'position', 'team', 'opponent', 'venue', 'toi_on', 'goals', 'a1', 'a2', 'icors', 'iblocks', 'pend', 'pent', 'ifac_win', 'ifac_loss', 'games' ] # Get All Data dfs = [] for season in range(2007, 2018): for pos in ['forwards', 'defensemen']: print("all", pos, season) with open("../projection_data/skaters/{}_all_sits_{}.json".format( pos, season)) as file: dfs.append(pd.DataFrame(json.load(file)['data'], columns=cols)) # Combine All Forward and Defensemen Data df = pd.concat(dfs) # Fix the F*****g Names!!!!! for team_col in ['team', "opponent", "venue"]: df = helpers.fix_team(df, team_col) # Idk df['toi_on'] = df['toi_on'].astype(float) # Change over some names for merging df = df.rename(index=str, columns={"toi_on": "toi_on_all"}) # Get the correct game_id df['game_id'] = df.apply( lambda x: str(x['season']) + "0" + str(x['game_id']), axis=1) return df
def get_all_sits_data(): """ Get and prepare the All Situations data :return: DataFrame of data """ cols = [ "Team", "Season", "Game.ID", "Date", "Opponent", "Venue", "TOI_all", "PENT_all", "PEND_all" ] df = pd.read_csv("./data/teams_all_sits.csv") df = df.sort_values(by=['Season', 'Game.ID', 'Team']) for team_col in ['Team', "Opponent", "Venue"]: df = helpers.fix_team(df, team_col) df = df.rename(index=str, columns={ "TOI": "TOI_all", "PENT": "PENT_all", "PEND": "PEND_all" }) return df[cols]