def add_onice_players_to_df(df, focus_team, season, gamecol, player_output='ids'): """ Uses the _Secs column in df, the season, and the gamecol to join onto on-ice players. :param df: dataframe :param focus_team: str or int, team to focus on. Its players will be listed in first in sheet. :param season: int, the season :param gamecol: str, the column with game IDs :param player_output: str, use 'names' or 'nums' or 'ids'. Currently 'nums' is not supported. :return: dataframe with team and opponent players """ toi = teams.get_team_toi(season, focus_team).rename(columns={ 'Time': '_Secs' }).drop_duplicates() toi = toi[[ 'Game', '_Secs', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5', 'Team6', 'Opp1', 'Opp2', 'Opp3', 'Opp4', 'Opp5', 'Opp6' ]].rename(columns={'Game': gamecol}) # Rename columns toi = toi.rename( columns={ col: '{0:s}{1:s}'.format(focus_team, col[-1]) for col in toi.columns if len(col) >= 4 and col[:4] == 'Team' }) joined = df.merge(toi, how='left', on=['_Secs', gamecol]) # Print missing games by finding nulls in Opp1 # If I actually do have the TOI (which may not have made it into the team log b/c of missing PBP), then use that missings = set(joined[pd.isnull(joined.Opp1)].Game.unique()) hassome = set(joined[pd.notnull(joined.Opp1)].Game.unique()) for game in missings: if game in hassome: print( 'Missing some (not all) data to join on-ice players for {0:d}'. format(int(round(game)))) else: # See if I have its TOI try: gametoi = parse_toi.get_parsed_toi(season, int(round(game))) \ .rename(columns={'Time': '_Secs'}).drop_duplicates() \ .drop({'HomeStrength', 'RoadStrength', 'HG', 'RG'}, axis=1) # Now that I do, need to switch column names, get players in right format, and join hname = team_info.team_as_str( schedules.get_home_team(season, int(round(game)))) if hname == focus_team: gametoi = gametoi.rename(columns={ 'H' + str(x): focus_team + str(x) for x in range(1, 7) }) gametoi = gametoi.rename(columns={ 'R' + str(x): 'Opp' + str(x) for x in range(1, 7) }) else: gametoi = gametoi.rename(columns={ 'R' + str(x): focus_team + str(x) for x in range(1, 7) }) gametoi = gametoi.rename(columns={ 'H' + str(x): 'Opp' + str(x) for x in range(1, 7) }) gametoi = gametoi.assign(Game=int(round(game))) joined = helpers.fill_join(joined, gametoi, on=['_Secs', gamecol]) continue except OSError: pass print('Missing all data to join on-ice players for {0:d}'.format( int(round(game)))) print('Check scrape / parse status and game number') # Now convert to names or numbers for col in joined.columns[-12:]: if player_output == 'ids': pass elif player_output == 'names': joined.loc[:, col] = players.playerlst_as_str( pd.to_numeric(joined[col])) elif player_output == 'nums': pass # TODO return joined.drop('_Secs', axis=1)
def score_state_graph(season): """ Generates a horizontal stacked bar graph showing how much 5v5 TOI each team has played in each score state for given season. :param season: int, the season :return: """ #TODO make kwargs match other methods: startseason, startdate, etc state_toi = manip.team_5v5_score_state_summary_by_game(season) \ .drop('Game', axis=1) \ .groupby(['Team', 'ScoreState'], as_index=False).sum() bar_positions = _score_state_graph_bar_positions(state_toi) bar_positions.loc[:, 'Team'] = bar_positions.Team.apply( lambda x: team_info.team_as_str(x)) plt.clf() tiedcolor, leadcolor, trailcolor = plt.rcParams['axes.prop_cycle'].by_key( )['color'][:3] colors = {0: tiedcolor, 1: leadcolor, -1: trailcolor} for i in range(2, 4): colors[i] = vhelper.make_color_lighter(colors[i - 1]) colors[-1 * i] = vhelper.make_color_lighter(colors[-1 * i + 1]) for score in (-3, -2, -1, 0, 1, 2, 3): # bar_positions.ScoreState.unique(): score = int(score) if score == 3: label = 'Up 3+' elif score > 0: label = 'Up {0:d}'.format(score) elif score == 0: label = 'Tied' elif score == -3: label = 'Trail 3+' else: label = 'Trail {0:d}'.format(-1 * score) temp = bar_positions.query('ScoreState == {0:d}'.format(score)) alpha = 0.5 plt.barh(bottom=temp.Y.values, width=temp.Width.values, left=temp.Left.values, label=label, alpha=alpha, color=colors[score]) for index, y, team in bar_positions[['Y', 'Team' ]].drop_duplicates().itertuples(): plt.annotate(team, xy=(0, y), ha='center', va='center', fontsize=6) plt.ylim(-1, len(bar_positions.Team.unique())) plt.legend(loc='lower right', fontsize=8) plt.yticks([]) for spine in ['right', 'left', 'top', 'bottom']: plt.gca().spines[spine].set_visible(False) plt.title(get_score_state_graph_title(season)) lst = list(np.arange(-0.6, 0.61, 0.2)) plt.xticks(lst, ['{0:d}%'.format(abs(int(round(100 * x)))) for x in lst]) plt.show()
def _game_h2h_chart(season, game, corsi, toi, orderh, orderr, numf_h=None, numf_r=None, save_file=None): """ This method actually does the plotting for game_h2h :param season: int, the season :param game: int, the game :param :param corsi: df of P1, P2, Corsi +/- for P1 :param toi: df of P1, P2, H2H TOI :param orderh: list of float, player order on y-axis, top to bottom :param orderr: list of float, player order on x-axis, left to right :param numf_h: int. Number of forwards for home team. Used to add horizontal bold line between F and D :param numf_r: int. Number of forwards for road team. Used to add vertical bold line between F and D. :param save_file: str of file to save the figure to, or None to simply display :return: nothing """ hname = team_info.team_as_str(schedules.get_home_team(season, game), True) homename = team_info.team_as_str(schedules.get_home_team(season, game), False) rname = team_info.team_as_str(schedules.get_road_team(season, game), True) roadname = team_info.team_as_str(schedules.get_road_team(season, game), False) fig, ax = plt.subplots(1, figsize=[11, 7]) # Convert dataframes to coordinates horderdf = pd.DataFrame({ 'PlayerID1': orderh[::-1], 'Y': list(range(len(orderh))) }) rorderdf = pd.DataFrame({ 'PlayerID2': orderr, 'X': list(range(len(orderr))) }) plotdf = toi.merge(corsi, how='left', on=['PlayerID1', 'PlayerID2']) \ .merge(horderdf, how='left', on='PlayerID1') \ .merge(rorderdf, how='left', on='PlayerID2') # Hist2D of TOI # I make the bins a little weird so my coordinates are centered in them. Otherwise, they're all on the edges. _, _, _, image = ax.hist2d(x=plotdf.X, y=plotdf.Y, bins=(np.arange(-0.5, len(orderr) + 0.5, 1), np.arange(-0.5, len(orderh) + 0.5, 1)), weights=plotdf.Min, cmap=plt.cm.summer) # Convert IDs to names and label axes and axes ticks ax.set_xlabel(roadname) ax.set_ylabel(homename) xorder = players.playerlst_as_str(orderr) yorder = players.playerlst_as_str( orderh)[::-1] # need to go top to bottom, so reverse order ax.set_xticks(range(len(xorder))) ax.set_yticks(range(len(yorder))) ax.set_xticklabels(xorder, fontsize=10, rotation=45, ha='right') ax.set_yticklabels(yorder, fontsize=10) ax.set_xlim(-0.5, len(orderr) - 0.5) ax.set_ylim(-0.5, len(orderh) - 0.5) # Hide the little ticks on the axes by setting their length to 0 ax.tick_params(axis='both', which='both', length=0) # Add dividing lines between rows for x in np.arange(0.5, len(orderr) - 0.5, 1): ax.plot([x, x], [-0.5, len(orderh) - 0.5], color='k') for y in np.arange(0.5, len(orderh) - 0.5, 1): ax.plot([-0.5, len(orderr) - 0.5], [y, y], color='k') # Add a bold line between F and D. if numf_r is not None: ax.plot([numf_r - 0.5, numf_r - 0.5], [-0.5, len(orderh) - 0.5], color='k', lw=3) if numf_h is not None: ax.plot([-0.5, len(orderr) - 0.5], [len(orderh) - numf_h - 0.5, len(orderh) - numf_h - 0.5], color='k', lw=3) # Colorbar for TOI cbar = fig.colorbar(image, pad=0.1) cbar.ax.set_ylabel('TOI (min)') # Add trademark cbar.ax.set_xlabel('Muneeb Alam\n@muneebalamcu', labelpad=20) # Add labels for Corsi and circle negatives neg_x = [] neg_y = [] for y in range(len(orderh)): hpid = orderh[len(orderh) - y - 1] for x in range(len(orderr)): rpid = orderr[x] cf = corsi[(corsi.PlayerID1 == hpid) & (corsi.PlayerID2 == rpid)] if len( cf ) == 0: # In this case, player will not have been on ice for a corsi event cf = 0 else: cf = int(cf.HomeCorsi.iloc[0]) if cf == 0: cf = '0' elif cf > 0: cf = '+' + str( cf) # Easier to pick out positives with plus sign else: cf = str(cf) neg_x.append(x) neg_y.append(y) ax.annotate(cf, xy=(x, y), ha='center', va='center') # Circle negative numbers by making a scatterplot with black edges and transparent faces ax.scatter(neg_x, neg_y, marker='o', edgecolors='k', s=200, facecolors='none') # Add TOI and Corsi totals at end of rows/columns topax = ax.twiny() topax.set_xticks(range(len(xorder))) rtotals = pd.DataFrame({'PlayerID2': orderr}) \ .merge(toi[['PlayerID2', 'Secs']].groupby('PlayerID2').sum().reset_index(), how='left', on='PlayerID2') \ .merge(corsi[['PlayerID2', 'HomeCorsi']].groupby('PlayerID2').sum().reset_index(), how='left', on='PlayerID2') rtotals.loc[:, 'HomeCorsi'] = rtotals.HomeCorsi.fillna(0) rtotals.loc[:, 'CorsiLabel'] = rtotals.HomeCorsi.apply( lambda x: visualization_helper.format_number_with_plus(-1 * int(x / 5) )) rtotals.loc[:, 'TOILabel'] = rtotals.Secs.apply( lambda x: manip.time_to_mss(x / 5)) toplabels = [ '{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(rtotals.CorsiLabel), list(rtotals.TOILabel)) ] ax.set_xticks(range(len(xorder))) topax.set_xticklabels(toplabels, fontsize=6, rotation=45, ha='left') topax.set_xlim(-0.5, len(orderr) - 0.5) topax.tick_params(axis='both', which='both', length=0) rightax = ax.twinx() rightax.set_yticks(range(len(yorder))) htotals = pd.DataFrame({'PlayerID1': orderh[::-1]}) \ .merge(toi[['PlayerID1', 'Secs']].groupby('PlayerID1').sum().reset_index(), how='left', on='PlayerID1') \ .merge(corsi[['PlayerID1', 'HomeCorsi']].groupby('PlayerID1').sum().reset_index(), how='left', on='PlayerID1') htotals.loc[:, 'HomeCorsi'] = htotals.HomeCorsi.fillna(0) htotals.loc[:, 'CorsiLabel'] = htotals.HomeCorsi.apply( lambda x: visualization_helper.format_number_with_plus(int(x / 5))) htotals.loc[:, 'TOILabel'] = htotals.Secs.apply( lambda x: manip.time_to_mss(x / 5)) rightlabels = [ '{0:s} in {1:s}'.format(x, y) for x, y, in zip(list(htotals.CorsiLabel), list(htotals.TOILabel)) ] rightax.set_yticks(range(len(yorder))) rightax.set_yticklabels(rightlabels, fontsize=6) rightax.set_ylim(-0.5, len(orderh) - 0.5) rightax.tick_params(axis='both', which='both', length=0) # plt.subplots_adjust(top=0.80) # topax.set_ylim(-0.5, len(orderh) - 0.5) # Add brief explanation for the top left cell at the bottom explanation = [] row1name = yorder.iloc[-1] col1name = xorder.iloc[0] timeh2h = int(toi[(toi.PlayerID1 == orderh[0]) & (toi.PlayerID2 == orderr[0])].Secs.iloc[0]) shoth2h = int(corsi[(corsi.PlayerID1 == orderh[0]) & (corsi.PlayerID2 == orderr[0])].HomeCorsi.iloc[0]) explanation.append( 'The top left cell indicates {0:s} (row 1) faced {1:s} (column 1) for {2:s}.' .format(row1name, col1name, manip.time_to_mss(timeh2h))) if shoth2h == 0: explanation.append( 'During that time, {0:s} and {1:s} were even in attempts.'.format( hname, rname)) elif shoth2h > 0: explanation.append( 'During that time, {0:s} out-attempted {1:s} by {2:d}.'.format( hname, rname, shoth2h)) else: explanation.append( 'During that time, {1:s} out-attempted {0:s} by {2:d}.'.format( hname, rname, -1 * shoth2h)) explanation = '\n'.join(explanation) # Hacky way to annotate: add this to x-axis label ax.set_xlabel(ax.get_xlabel() + '\n\n' + explanation) plt.subplots_adjust(bottom=0.27) plt.subplots_adjust(left=0.17) plt.subplots_adjust(top=0.82) plt.subplots_adjust(right=1.0) # Add title plt.title(_get_game_h2h_chart_title(season, game, corsi.HomeCorsi.sum() / 25, toi.Secs.sum() / 25), y=1.1, va='bottom') plt.gcf().canvas.set_window_title('{0:d} {1:d} H2H.png'.format( season, game)) # fig.tight_layout() if save_file is None: plt.show() elif save_file == 'fig': return plt.gcf() else: plt.savefig(save_file) return None
def team_score_shot_rate_parallel(team, startseason, endseason=None, save_file=None): """ :param team: :param startseason: :param endseason: :param save_file: :return: """ if endseason is None: endseason = startseason df = pd.concat([ manip.team_5v5_shot_rates_by_score(season) for season in range(startseason, endseason + 1) ]) df.loc[:, 'ScoreState'] = df.ScoreState.apply( lambda x: max(min(3, x), -3)) # reduce to +/- 3 df = df.drop('Game', axis=1) \ .groupby(['Team', 'ScoreState'], as_index=False) \ .sum() df.loc[:, 'CF%'] = df.CF / (df.CF + df.CA) df = df[['Team', 'ScoreState', 'CF%']] \ .sort_values('ScoreState') statelabels = { x: 'Lead{0:d}'.format(x) if x >= 1 else 'Trail{0:d}'.format(abs(x)) for x in range(-3, 4) } statelabels[0] = 'Tied' df.loc[:, 'ScoreState'] = df.ScoreState.apply(lambda x: statelabels[x]) # Go to wide df = df.pivot_table(index='Team', columns='ScoreState', values='CF%').reset_index() # Reorder columns df = df[[ 'Team', 'Trail3', 'Trail2', 'Trail1', 'Tied', 'Lead1', 'Lead2', 'Lead3' ]] # Teams to strings df.loc[:, 'Team'] = df.Team.apply(lambda x: team_info.team_as_str(x)) # filter for own team teamdf = df.query('Team == "{0:s}"'.format(team_info.team_as_str(team))) # Make parallel coords vhelper.parallel_coords(df, teamdf, 'Team') # Set yticklabels ys = (0.4, 0.5, 0.6) plt.yticks(ys, ['{0:d}%'.format(int(y * 100)) for y in ys]) plt.ylim(0.35, 0.65) plt.title( _team_score_shot_rate_parallel_title(team, startseason, endseason)) for direction in ['right', 'top', 'bottom', 'left']: plt.gca().spines[direction].set_visible(False) if save_file is None: plt.show() else: plt.savefig(save_file)
def game_timeline(season, game, save_file=None): """ Creates a shot attempt timeline as seen on @muneebalamcu :param season: int, the season :param game: int, the game :param save_file: str, specify a valid filepath to save to file. If None, merely shows on screen. Specify 'fig' to return the figure :return: nothing, or the figure """ hname = team_info.team_as_str(schedules.get_home_team(season, game)) rname = team_info.team_as_str(schedules.get_road_team(season, game)) cf = { hname: _get_home_cf_for_timeline(season, game), rname: _get_road_cf_for_timeline(season, game) } pps = { hname: _get_home_adv_for_timeline(season, game), rname: _get_road_adv_for_timeline(season, game) } gs = { hname: _get_home_goals_for_timeline(season, game), rname: _get_road_goals_for_timeline(season, game) } colors = { hname: plt.rcParams['axes.prop_cycle'].by_key()['color'][0], rname: plt.rcParams['axes.prop_cycle'].by_key()['color'][1] } darkercolors = { team: visualization_helper.make_color_darker(hex=col) for team, col in colors.items() } # Create two axes. Use bottom (mins) for labeling but top (secs) for plotting ax = plt.gca() ax2 = ax.twiny() # Corsi lines for team in cf: ax2.plot(cf[team].Time, cf[team].CumCF, label=team, color=colors[team]) # Label goal counts when scored with diamonds for team in gs: xs, ys = _goal_times_to_scatter_for_timeline(gs[team], cf[team]) ax2.scatter(xs, ys, edgecolors='k', marker='D', label='{0:s} goal'.format(team), zorder=3, color=colors[team]) # Bold lines to separate periods _, ymax = ax2.get_ylim() for x in range(0, cf[hname].Time.max(), 1200): ax2.plot([x, x], [0, ymax], color='k', lw=2) # PP highlighting # Note that axvspan works in relative coords (0 to 1), so need to divide by ymax for team in pps: for pptype in pps[team]: if pptype[-2:] == '+1': colors_to_use = colors else: colors_to_use = darkercolors for i, (start, end) in enumerate(pps[team][pptype]): cf_at_time_min = cf[team].loc[ cf[team].Time == start].CumCF.max() # in case there are multiple cf_at_time_max = cf[team][cf[team].Time == end].CumCF.max() if i == 0: ax2.axvspan(start, end, ymin=cf_at_time_min / ymax, ymax=cf_at_time_max / ymax, alpha=0.5, facecolor=colors_to_use[team], label='{0:s} {1:s}'.format(team, pptype)) else: ax2.axvspan(start, end, ymin=cf_at_time_min / ymax, ymax=cf_at_time_max / ymax, alpha=0.5, facecolor=colors[team]) ax2.axvspan(start, end, ymin=0, ymax=0.05, alpha=0.5, facecolor=colors_to_use[team]) # Set limits ax2.set_xlim(0, cf[hname].Time.max()) ax2.set_ylim(0, ymax) ax.set_ylabel('Cumulative CF') plt.legend(loc=2, framealpha=0.5, fontsize=8) # Ticks every 10 min on bottom axis; none on top axis ax.set_xlim(0, cf[hname].Time.max() / 60) ax.set_xticks(range(0, cf[hname].Time.max() // 60 + 1, 10)) ax.set_xlabel('Time elapsed in game (min)') ax2.set_xticks([]) # Set title plt.title(_get_corsi_timeline_title(season, game)) plt.gcf().canvas.set_window_title('{0:d} {1:d} TL.png'.format( season, game)) if save_file is None: plt.show() elif save_file == 'fig': return plt.gcf() else: plt.savefig(save_file) plt.close() return None
def update_team_logs(season, force_overwrite=False, force_games=None): """ This method looks at the schedule for the given season and writes pbp for scraped games to file. It also adds the strength at each pbp event to the log. :param season: int, the season :param force_overwrite: bool, whether to generate from scratch :param force_games: None or iterable of games to force_overwrite specifically :return: nothing """ # For each team sch = schedules.get_season_schedule(season).query('Status == "Final"') new_games_to_do = sch[(sch.Game >= 20001) & (sch.Game <= 30417)] if force_games is not None: new_games_to_do = pd.concat([new_games_to_do, sch.merge(pd.DataFrame({'Game': list(force_games)}), how='inner', on='Game')]) \ .sort_values('Game') allteams = sorted( list(new_games_to_do.Home.append(new_games_to_do.Road).unique())) for teami, team in enumerate(allteams): print('Updating team log for {0:d} {1:s}'.format( season, team_info.team_as_str(team))) # Compare existing log to schedule to find missing games newgames = new_games_to_do[(new_games_to_do.Home == team) | (new_games_to_do.Road == team)] if force_overwrite: pbpdf = None toidf = None else: # Read currently existing ones for each team and anti join to schedule to find missing games try: pbpdf = get_team_pbp(season, team) if force_games is not None: pbpdf = helpers.anti_join(pbpdf, pd.DataFrame( {'Game': list(force_games)}), on='Game') newgames = newgames.merge(pbpdf[['Game']].drop_duplicates(), how='outer', on='Game', indicator=True) newgames = newgames[newgames._merge == "left_only"].drop( '_merge', axis=1) except FileNotFoundError: pbpdf = None except pyarrow.lib.ArrowIOError: # pyarrow (feather) FileNotFoundError equivalent pbpdf = None try: toidf = get_team_toi(season, team) if force_games is not None: toidf = helpers.anti_join(toidf, pd.DataFrame( {'Game': list(force_games)}), on='Game') except FileNotFoundError: toidf = None except pyarrow.lib.ArrowIOError: # pyarrow (feather) FileNotFoundError equivalent toidf = None for i, gamerow in newgames.iterrows(): game = gamerow[1] home = gamerow[2] road = gamerow[4] # load parsed pbp and toi try: gamepbp = parse_pbp.get_parsed_pbp(season, game) gametoi = parse_toi.get_parsed_toi(season, game) # TODO 2016 20779 why does pbp have 0 rows? # Also check for other errors in parsing etc if len(gamepbp) > 0 and len(gametoi) > 0: # Rename score and strength columns from home/road to team/opp if team == home: gametoi = gametoi.assign(TeamStrength=gametoi.HomeStrength, OppStrength=gametoi.RoadStrength) \ .drop({'HomeStrength', 'RoadStrength'}, axis=1) gamepbp = gamepbp.assign(TeamScore=gamepbp.HomeScore, OppScore=gamepbp.RoadScore) \ .drop({'HomeScore', 'RoadScore'}, axis=1) else: gametoi = gametoi.assign(TeamStrength=gametoi.RoadStrength, OppStrength=gametoi.HomeStrength) \ .drop({'HomeStrength', 'RoadStrength'}, axis=1) gamepbp = gamepbp.assign(TeamScore=gamepbp.RoadScore, OppScore=gamepbp.HomeScore) \ .drop({'HomeScore', 'RoadScore'}, axis=1) # add scores to toi and strengths to pbp gamepbp = gamepbp.merge( gametoi[['Time', 'TeamStrength', 'OppStrength']], how='left', on='Time') gametoi = gametoi.merge( gamepbp[['Time', 'TeamScore', 'OppScore']], how='left', on='Time') gametoi.loc[:, 'TeamScore'] = gametoi.TeamScore.fillna( method='ffill') gametoi.loc[:, 'OppScore'] = gametoi.OppScore.fillna( method='ffill') # Switch TOI column labeling from H1/R1 to Team1/Opp1 as appropriate cols_to_change = list(gametoi.columns) cols_to_change = [ x for x in cols_to_change if len(x) == 2 ] # e.g. H1 if team == home: swapping_dict = {'H': 'Team', 'R': 'Opp'} colchanges = { c: swapping_dict[c[0]] + c[1] for c in cols_to_change } else: swapping_dict = {'H': 'Opp', 'R': 'Team'} colchanges = { c: swapping_dict[c[0]] + c[1] for c in cols_to_change } gametoi = gametoi.rename(columns=colchanges) # finally, add game, home, and road to both dfs gamepbp.loc[:, 'Game'] = game gamepbp.loc[:, 'Home'] = home gamepbp.loc[:, 'Road'] = road gametoi.loc[:, 'Game'] = game gametoi.loc[:, 'Home'] = home gametoi.loc[:, 'Road'] = road # concat toi and pbp if pbpdf is None: pbpdf = gamepbp else: pbpdf = pd.concat([pbpdf, gamepbp]) if toidf is None: toidf = gametoi else: toidf = pd.concat([toidf, gametoi]) except FileNotFoundError: pass # write to file if pbpdf is not None: pbpdf.loc[:, 'FocusTeam'] = team if toidf is not None: toidf.loc[:, 'FocusTeam'] = team write_team_pbp(pbpdf, season, team) write_team_toi(toidf, season, team) print('Done with team logs for {0:d} {1:s} ({2:d}/{3:d})'.format( season, team_info.team_as_str(team), teami + 1, len(allteams)))