def get_pairings(season, game, homeroad='H'): """ Returns a df listing the 5v5 pairs used in this game for specified team, and time they each played together :param season: int, the game :param game: int, the season :param homeroad: str, 'H' for home or 'R' for road :return: pandas dataframe with columns P1, P2, Secs. May contain duplicates """ toi = sg.get_parsed_toi(season, game) pos = get_player_positions() fives = toi[(toi.HomeStrength == "5") & (toi.RoadStrength == "5")] cols_to_keep = ['Time'] + ['{0:s}{1:d}'.format(homeroad, i + 1) for i in range(5)] players = fives[cols_to_keep] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) \ .merge(pos, how='left', left_on='PlayerID', right_on='ID') \ .query('Pos == "D"') \ .drop({'Pos', 'ID'}, axis=1) wide = players.merge(players, how='inner', on='Time', suffixes=['1', '2']) \ .query('PlayerID1 != PlayerID2') counts = wide.groupby(['PlayerID1', 'PlayerID2']).count().reset_index() \ .rename(columns={'Time': 'Secs'}) return counts
def get_game_h2h_toi(season, game): """ This method gets H2H TOI at 5v5 for the given game. :param season: int, the season :param game: int, the game :return: a df with [P1, P1Team, P2, P2Team, TOI]. Entries will be duplicated (one with given P as P1, another as P2) """ # TODO add strength arg toi = sg.get_parsed_toi(season, game) fives = toi[(toi.HomeStrength == "5") & (toi.RoadStrength == "5")] home = fives[['Time', 'H1', 'H2', 'H3', 'H4', 'H5']] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) \ .assign(Team='H') road = fives[['Time', 'R1', 'R2', 'R3', 'R4', 'R5']] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) \ .assign(Team='R') hh = home.merge(home, how='inner', on='Time', suffixes=['1', '2']) hr = home.merge(road, how='inner', on='Time', suffixes=['1', '2']) rh = road.merge(home, how='inner', on='Time', suffixes=['1', '2']) rr = road.merge(road, how='inner', on='Time', suffixes=['1', '2']) pairs = pd.concat([hh, hr, rh, rr]) \ .assign(Secs=1) \ .drop('Time', axis=1) \ .groupby(['PlayerID1', 'PlayerID2', 'Team1', 'Team2']).count().reset_index() # One last to-do: make sure I have all possible pairs of players covered allpairs = _convert_to_all_combos(pairs, 0, ('PlayerID1', 'Team1'), ('PlayerID2', 'Team2')) allpairs.loc[:, 'Min'] = allpairs.Secs / 60 return allpairs
def get_player_toi(season, game, pos=None, homeroad='H'): """ Returns a df listing 5v5 ice time for each player for specified team. :param season: int, the game :param game: int, the season :param pos: specify 'L', 'C', 'R', 'D' or None for all :param homeroad: str, 'H' for home or 'R' for road :return: pandas df with columns Player, Secs """ # TODO this isn't working properly for in-progress games. Or maybe it's my scraping earlier. toi = sg.get_parsed_toi(season, game) posdf = get_player_positions() fives = toi[(toi.HomeStrength == "5") & (toi.RoadStrength == "5")] cols_to_keep = ['Time'] + ['{0:s}{1:d}'.format(homeroad, i + 1) for i in range(5)] players = fives[cols_to_keep] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) \ .groupby('PlayerID').count().reset_index() \ .rename(columns={'Time': 'Secs'}) \ .merge(posdf, how='left', left_on='PlayerID', right_on='ID') \ .drop('ID', axis=1) \ .sort_values('Secs', ascending=False) if pos is not None: if pos == 'F': players = players.query('Pos != "D"') else: players = players.query('Pos == "{0:s}"'.format(pos)) return players
def _join_on_ice_players_to_pbp(season, game, pbp=None, toi=None): """ For the given season and game, returns pbp with on-ice players attached. :param season: int, the season :param game: int, the game :param pbp: df, the plays. If None, will read from file. :param toi: df, the shifts to join to plays. If None, will read from file. :return: df, pbp but augmented with on-ice players """ if pbp is None: pbp = sg.get_parsed_pbp(season, game) if toi is None: toi = sg.get_parsed_toi(season, game) newpbp = pbp.merge(toi, how='left', on='Time') return newpbp
def _get_home_adv_for_timeline(season, game): """ Identifies times where home team had a PP or extra attacker, for highlighting on timeline :param season: int, the game :param game: int, the season :return: a dictionary: {'PP+1': ((start, end), (start, end), ...), 'PP+2': ((start, end), (start, end), ...)...} """ # TODO add functionality for extra attacker toi = sg.get_parsed_toi(season, game) pp1 = toi[((toi.HomeStrength == "5") & (toi.RoadStrength == "4")) | ((toi.HomeStrength == "4") & (toi.RoadStrength == "3"))].Time pp2 = toi[(toi.HomeStrength == "5") & (toi.RoadStrength == "3")].Time df = {'PP+1': _get_contiguous_times(sorted(list(pp1))), 'PP+2': _get_contiguous_times(sorted(list(pp2)))} return df
def get_game_h2h_corsi(season, game): """ This method gets H2H Corsi at 5v5 for the given game. :param season: int, the season :param game: int, the game :return: a df with [P1, P1Team, P2, P2Team, CF, CA, C+/-]. Entries will be duplicated, as with get_game_h2h_toi. """ # TODO add strength arg toi = sg.get_parsed_toi(season, game) pbp = sg.get_parsed_pbp(season, game) # toi.to_csv('/Users/muneebalam/Desktop/toi.csv') # pbp.to_csv('/Users/muneebalam/Desktop/pbp.csv') # pbp.loc[:, 'Event'] = pbp.Event.apply(lambda x: ss.convert_event(x)) pbp = pbp[['Time', 'Event', 'Team']] \ .merge(toi[['Time', 'R1', 'R2', 'R3', 'R4', 'R5', 'H1', 'H2', 'H3', 'H4', 'H5', 'HomeStrength', 'RoadStrength']], how='inner', on='Time') corsi = filter_for_corsi(pbp).drop(['HomeStrength', 'RoadStrength'], axis=1) hometeam = ss.get_home_team(season, game) # Add HomeCorsi which will be 1 or -1. Need to separate out blocks because they're credited to defending team # Never mind, switched block attribution at time of parsing, so we're good now corsi.loc[:, 'HomeCorsi'] = corsi.Team.apply(lambda x: 1 if x == hometeam else -1) corsipm = corsi[['Time', 'HomeCorsi']] home = corsi[['Time', 'H1', 'H2', 'H3', 'H4', 'H5']] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) road = corsi[['Time', 'R1', 'R2', 'R3', 'R4', 'R5']] \ .melt(id_vars='Time', var_name='P', value_name='PlayerID') \ .drop('P', axis=1) hh = home.merge(home, how='inner', on='Time', suffixes=['1', '2']).assign(Team1='H', Team2='H') hr = home.merge(road, how='inner', on='Time', suffixes=['1', '2']).assign(Team1='H', Team2='R') rh = road.merge(home, how='inner', on='Time', suffixes=['1', '2']).assign(Team1='R', Team2='H') rr = road.merge(road, how='inner', on='Time', suffixes=['1', '2']).assign(Team1='R', Team2='R') pairs = pd.concat([hh, hr, rh, rr]) \ .merge(corsipm, how='inner', on='Time') \ .drop('Time', axis=1) \ .groupby(['PlayerID1', 'PlayerID2', 'Team1', 'Team2']).sum().reset_index() pairs.loc[pairs.Team1 == 'R', 'HomeCorsi'] = pairs.loc[pairs.Team1 == 'R', 'HomeCorsi'] * -1 allpairs = _convert_to_all_combos(pairs, 0, ('PlayerID1', 'Team1'), ('PlayerID2', 'Team2')) return allpairs
def _get_cf_for_timeline(season, game, homeroad, granularity='min'): """ Returns a dataframe with columns for time and cumulative CF :param season: int, the season :param game: int, the game :param homeroad: str, 'H' for home and 'R' for road :param granularity: can respond in minutes, or seconds, elapsed in game :return: a dataframe with two columns """ pbp = sg.get_parsed_pbp(season, game) pbp = manip.filter_for_corsi(pbp) if homeroad == 'H': teamid = ss.get_home_team(season, game) elif homeroad == 'R': teamid = ss.get_road_team(season, game) pbp = pbp[pbp.Team == teamid] maxtime = len(sg.get_parsed_toi(season, game)) df = pd.DataFrame({'Time': list(range(maxtime))}) df = df.merge(pbp[['Time']].assign(CF=1), how='left', on='Time') # df.loc[:, 'Time'] = df.Time + 1 df.loc[:, 'CF'] = df.CF.fillna(0) df.loc[:, 'CumCF'] = df.CF.cumsum() df.drop('CF', axis=1, inplace=True) # Now let's shift things down. Right now a shot at 30 secs will mean Time = 0 has CumCF = 1. if granularity == 'min': df.loc[:, 'Time'] = df.Time // 60 df = df.groupby('Time').max().reset_index() # I want it soccer style, so Time = 0 always has CumCF = 0, and that first shot at 30sec will register for Time=1 df = pd.concat([pd.DataFrame({'Time': [-1], 'CumCF': [0]}), df]) df.loc[:, 'Time'] = df.Time + 1 return df
def add_on_ice_players_to_df(df, season_colname=None, game_colname='Game', period_colname='Period', time_colname='Time', time_format='elapsed', faceoff_indicator=False): """ A method to add on-ice players to each row. :param df: a dataframe :param season_colname: str, name of column containing season. Defaults to assuming all are current season :param game_colname: str, name of column containing game :param period_colname: str, name of column containing period (ints) :param time_colname: str, name of column containing time in m:ss :param time_format: str, use 'elapsed' (preferred) or 'remaining' (latter may not work for regular season) :param faceoff_indicator: bool. This is suitable for events like shots. For faceoffs, specify True. This is because TOI is recorded in a non-overlapping way. At the time of the faceoff, the on-ice players listed are the players still on from the previous time. To get new players coming onto the ice, if this arg is True, will join to players on ice one second after. :return: nothing """ df = period_time_to_elapsed(df, period_colname, time_colname, time_format) if season_colname is None or season_colname not in df.columns: df.loc[:, 'Season'] = ss.get_current_season() season_colname = 'Season' newdf = [] seasons_and_games = df[[season_colname, game_colname]].drop_duplicates() for season, game in seasons_and_games.iterrows(): tempdf = df[(df[season_colname] == season) & (df[game_colname] == game)] toi = sg.get_parsed_toi(season, game) if faceoff_indicator: toi.loc[:, 'Time'] = toi.Time - 1 tempdf = tempdf.merge(toi.rename(columns={'Time': 'Elapsed'}), how='left', on='Elapsed') newdf.append(tempdf) newdf = pd.concat(newdf) newdf = player_columns_to_name(newdf) return newdf