def yards_to_go(gid, pid, prechecked_gid=False, prechecked_pid=False): """ Finds the distance needed (in yards) to achieve a first down Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- yds_to_go: an integer number of yards needed on a play to achieve a first down """ if not prechecked_gid: # Validate the game ID gid = check.game_id(gid) prechecked_gid = True if not prechecked_pid: # Validate the play ID pid = check.play_id(gid, pid) prechecked_pid = True # Load in the plays data play = load.plays_data(gid, pid, prechecked_gid, prechecked_pid) # Get the number of yards needed for a first down yds_to_go = play['yds_to_go'].iloc[0] return yds_to_go
def line_of_scrimmage(gid, pid, prechecked_gid=False, prechecked_pid=False): """ Finds the line of scrimmage for a specified play Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- los: a float of the absolute yardline of the line of scrimmage """ if not prechecked_gid: # Validate the game ID gid = check.game_id(gid) prechecked_gid = True if not prechecked_pid: # Validate the play ID pid = check.play_id(gid, pid) prechecked_pid = True # Load in the plays data play = load.plays_data(gid, pid, prechecked_gid, prechecked_pid) # Get the line of scrimmage los = play['absolute_yard_line'].iloc[0] return los
def n_frames(gid, pid, tracking=pd.DataFrame(), prechecked_gid=False, prechecked_pid=False): """ Finds the number of frames recorded for a particular play Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id tracking: a set of tracking information pertaining to a particular play. If none is provided, the entire tracking set will be used. This is the default prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- num_frames: an integer representing how many frames were recorded for the play """ if not prechecked_gid: # Validate the game ID gid = check.game_id(gid) prechecked_gid = True if not prechecked_pid: # Validate the play ID pid = check.play_id(gid, pid) prechecked_pid = True # If no tracking information is provided, load the tracking information # for the week containing the desired play if tracking.empty: week = game_week(gid) tracking = load.tracking_data(gid, pid, week, prechecked_gid, prechecked_pid, prechecked_week=True) # Get the last frame of the play num_frames = tracking['frame_id'].max() return num_frames
def game_week(gid): """ Finds the week in which a particular game was played Parameters ---------- gid: an integer of a game_id Returns ------- week: an integer representing the week the game was played in """ # Validate the game ID gid = check.game_id(gid) # Bring in the schedule data games = load.games_data() # Get the week corresponding to the game ID provided week = games.loc[games['game_id'] == gid, 'week'].iloc[0] return week
def game_teams(gid): """ Finds the teams that played in a specified game (via game_id) Parameters ---------- gid: an integer of a game_id Returns ------- home: a string of the home team's code away: a string of the away team's code """ # Validate the game ID gid = check.game_id(gid) # Bring in the schedule data games = load.games_data() # Get the home and away team codes home = games.loc[games['game_id'] == gid, 'home'].iloc[0] away = games.loc[games['game_id'] == gid, 'away'].iloc[0] return home, away
def plays_and_games(gid=0, home='', away='', prechecked_gid=False): """ Merges play and game data together to better illustrate what plays are being run by which team and against which opponent Parameters ---------- gid: an integer of a game_id home: a string representing the home team's team code away: a string representing the away team's team code play_info: a dictionary of parameters to use for subsetting. The keys MUST be columns in the plays data to be used. If not, they will be ignored prechecked_gid: a boolean of whether or not the game ID has been prechecked Returns ------- plays_from_game: a merged dataframe of play and game data """ if gid != 0: # If the game ID is not already checked, check the game ID first if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # If the game ID is not passed, then try to get a game ID based on the home # and away team. If this yields nothing, then load all games if home != '' or away != '': home = check.team_code(home) away = check.team_code(away) gid = find.game_id(home, away) prechecked_gid = True # Load in plays from the identified game, or from all games if game ID = 0 plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid) # Load in the games data to merge games_data = load.games_data( gid, prechecked_gid)[['game_id', 'home', 'away', 'week']] plays_from_game = pd.merge(left=plays_from_game, right=games_data, how='inner', on='game_id') plays_from_game['offensive_team'] = plays_from_game['possession_team'] plays_from_game['defensive_team'] = np.where( plays_from_game['offensive_team'] == plays_from_game['home'], plays_from_game['away'], plays_from_game['home']) plays_from_game = plays_from_game[[ 'game_id', 'play_id', 'play_description', 'quarter', 'down', 'yds_to_go', 'possession_team', 'play_type', 'yardline_side', 'yardline_number', 'offense_formation', 'personnel_offense', 'defenders_in_box', 'n_pass_rushers', 'personnel_defense', 'type_dropback', 'presnap_away_score', 'presnap_home_score', 'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player', 'pass_result', 'offensive_play_result', 'play_result', 'epa', 'is_defensive_pi', 'down_dist_summary', 'home', 'away', 'offensive_team', 'defensive_team', 'week' ]] return plays_from_game
def field(gid=0, home='nfl', away='', show=False, unit='yd', zero='l'): """ Draws a football field with the teams who are participating in the game. Teams are either supplied via the home and away arguments, or by looking them up from the game_id provided by the gid argument Parameters ---------- gid: an int of a game_id for which to draw the field home: a string of the home team's code. Not necessary if a game_id is provided away: a string of the away team's code. Not necessary if a game_id is provided show: a boolean of whether or not to show the plot unit: a string for the units with which to draw the field. Default is 'yds' for yards, could be 'ft' for feet zero: a string for where the origin of the plot should be. Default is 'l', meaning lower left corner. Could be 'c' for center Returns ------- fig, ax: the figure and axes objects (respectively) """ # If a game ID is provided, get the home and away team from the provided # game ID if gid != 0: gid = check.game_id(gid) home, away = find.game_teams(gid) # If no game ID provided, and the home team is 'NFL', set home and away # to NFC and AFC respectively. Otherwise, check to make sure the teams are # legit else: home = home.upper() away = away.upper() if home == 'NFL': home = 'NFC' away = 'AFC' else: home = check.team_code(home) away = check.team_code(away) # Get the teams' color codes team_info = load.teams_data() home_info = team_info[team_info['team_code'] == home] away_info = team_info[team_info['team_code'] == away] ############################# # Get the field coordinates # ############################# sidelines, endlines, goal_lines, midline, minor_yd_lines_b, \ minor_yd_lines_t, minor_yd_lines_l, minor_yd_lines_u, major_yd_lines, \ hashes_l, hashes_u, extra_pt_mark, arrow_40_l, arrow_40_u, \ arrow_30_l, arrow_30_u, arrow_20_l, arrow_20_u, arrow_10_l, \ arrow_10_u, field_marks = load.football_field_coords() ################# # Make the plot # ################# fig, ax = plt.subplots() ax.set_aspect('equal') fig.set_size_inches(50, 22.2) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) # Set field color ax.set_facecolor('#196f0c') # Put home logo at midfield if home.lower() in ['', 'nfl', 'nfc', 'afc']: img = os.path.join('img', 'logos', 'nfl.png') else: img = os.path.join('img', 'logos', f'{home}.png') img = plt.imread(img) if unit == 'yd': ax.imshow(img, extent=[52., 68., 18.65, 34.65], zorder=10) else: ax.imshow(img, extent=[-18., 18., -18., 18.], zorder=10) # Add sidelines, goal line, and 50 yard line ax.fill(sidelines['x'], sidelines['y'], '#ffffff') ax.fill(endlines['x'], endlines['y'], '#ffffff') ax.fill(goal_lines['x'], goal_lines['y'], '#ffffff') ax.fill(midline['x'], midline['y'], '#ffffff') # Add minor yard lines and major yard lines ax.fill(minor_yd_lines_b['x'], minor_yd_lines_b['y'], '#ffffff') ax.fill(minor_yd_lines_t['x'], minor_yd_lines_t['y'], '#ffffff') ax.fill(minor_yd_lines_l['x'], minor_yd_lines_l['y'], '#ffffff') ax.fill(minor_yd_lines_u['x'], minor_yd_lines_u['y'], '#ffffff') ax.fill(major_yd_lines['x'], major_yd_lines['y'], '#ffffff') # Add hash marks and extra point markers ax.fill(hashes_l['x'], hashes_l['y'], '#ffffff') ax.fill(hashes_u['x'], hashes_u['y'], '#ffffff') ax.fill(extra_pt_mark['x'], extra_pt_mark['y'], '#ffffff') # Add the numbers to the field for i, label in field_marks.iterrows(): ax.text(x=label['x'], y=label['y'], s=label['text'], fontsize=50, color='#ffffff', fontweight='bold', rotation=label['rotation'], fontname='Impact') # Add the arrows to the field ax.fill(arrow_40_l['x'], arrow_40_l['y'], '#ffffff') ax.fill(arrow_40_u['x'], arrow_40_u['y'], '#ffffff') ax.fill(arrow_30_l['x'], arrow_30_l['y'], '#ffffff') ax.fill(arrow_30_u['x'], arrow_30_u['y'], '#ffffff') ax.fill(arrow_20_l['x'], arrow_20_l['y'], '#ffffff') ax.fill(arrow_20_u['x'], arrow_20_u['y'], '#ffffff') ax.fill(arrow_10_l['x'], arrow_10_l['y'], '#ffffff') ax.fill(arrow_10_u['x'], arrow_10_u['y'], '#ffffff') ax.text(x=5, y=26.65, s=f'{home_info.nickname.iloc[0]}', fontdict={ 'ha': 'center', 'va': 'center' }, fontsize=100, fontweight='bold', fontname='Impact', color=f'{home_info.endzone_text.iloc[0]}', rotation=90, path_effects=[ pe.withStroke(linewidth=20, foreground=f'{home_info.endzone_shadow.iloc[0]}') ]) ax.text(x=114, y=26.65, s=f'{away_info.nickname.iloc[0]}', fontdict={ 'ha': 'center', 'va': 'center' }, fontsize=100, fontweight='bold', fontname='Impact', color=f'{away_info.endzone_text.iloc[0]}', rotation=-90, path_effects=[ pe.withStroke(linewidth=20, foreground=f'{away_info.endzone_shadow.iloc[0]}') ]) if show: plt.show() return None else: return fig, ax
def play_gif(gid=0, pid=0, home='', away='', prechecked_gid=False, prechecked_pid=False, tracking=pd.DataFrame()): # If a game ID is provided, get the home and away team from the provided # game ID if gid != 0: # Start by checking the game ID if it is provided but not yet checked if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # Get the home and away teams for the game home, away = find.game_teams(gid) # If no game ID provided, and the home team is 'NFL', set home and away # to NFC and AFC respectively. Otherwise, check to make sure the teams are # legit else: home = home.upper() away = away.upper() if home == 'NFL': home = 'NFC' away = 'AFC' else: home = check.team_code(home) away = check.team_code(away) gid = find.game_id(home, away) # Next, check the play ID if it has not already been checked if not prechecked_pid: pid = check.play_id(gid, pid, prechecked_gid) prechecked_pid = True # If tracking isn't supplied, load all relevant tracking data if tracking.empty: tracking = merge.tracking_and_plays(gid, pid) # Get the number of frames in the play n_frames = find.n_frames(gid=gid, pid=pid, tracking=tracking, prechecked_gid=True, prechecked_pid=True) # Make the temporary directory to hold static images file_ops.make_gif_temp_dir(gid, pid) # Make each frame as a static image for i in np.arange(1, n_frames + 1): print(f'Processing frame {i} of {n_frames}') fig, ax = play_frame(gid, pid, frame_no=i, prechecked_gid=True, prechecked_pid=True, tracking=tracking, prechecked_frame=True) if i < 10: fname = os.path.join('img', 'temp', f'{gid}_{pid}', f'{gid}_{pid}_000{i}.png') elif i < 100: fname = os.path.join('img', 'temp', f'{gid}_{pid}', f'{gid}_{pid}_00{i}.png') else: fname = os.path.join('img', 'temp', f'{gid}_{pid}', f'{gid}_{pid}_0{i}.png') plt.savefig(f'{fname}', bbox_inches='tight', pad_inches=0) try: gif_fname = tracking['down_dist_summary'].values[0] + '.gif' except: gif_fname = str(pid) + '.gif' # Collect the static images images = file_ops.collect_gif_play_frames(gid, pid) # Make and save the gif file_ops.make_gif(gid, pid, images, fname=gif_fname) # Delete the temporary directory that holds all static images. file_ops.remove_temp_static_frame_directory(gid, pid) return None
def play_frame(gid=0, pid=0, home='', away='', frame_no=0, plot_los=True, plot_first_down_marker=True, plot_arrows=True, prechecked_gid=False, prechecked_pid=False, prechecked_frame=False, tracking=pd.DataFrame()): """ Draw a frame of a given play. Teams are either supplied via the home and away arguments, or by looking them up from the game_id provided by the gid argument Parameters ---------- gid: an int representing the game_id pid: an int representing the play_id home: a string of the home team's code. Not necessary if a game_id is provided away: a string of the away team's code. Not necessary if a game_id is provided frame_no: the number of the frame to plot plot_los: a boolean of whether or not to plot the line of scrimmage on the plot plot_first_down_marker: a boolean of whether or not to plot the first down line on the plot prechecked_frame: a boolean indicating whether or not it's okay to skip the frame validation. Defaulting to False, but should be set to True when using the draw_play_gif() function tracking: a dataframe of tracking data that can be used to speed up plotting Returns ------- fig, ax: the figure and axes objects (respectively) """ if gid != 0: # Start by checking the game ID if it is provided but not yet checked if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # Get the home and away teams for the game home, away = find.game_teams(gid) # If no game ID provided, and the home team is 'NFL', set home and away # to NFC and AFC respectively. Otherwise, check to make sure the teams are # legit else: home = home.upper() away = away.upper() if home == 'NFL': home = 'NFC' away = 'AFC' else: home = check.team_code(home) away = check.team_code(away) gid = find.game_id(home, away) # Next, check the play ID if it has not already been checked if not prechecked_pid: pid = check.play_id(gid, pid, prechecked_gid) prechecked_pid = True # If tracking isn't supplied, load all relevant tracking data if tracking.empty: tracking = merge.tracking_and_plays(gid, pid) if not prechecked_frame: frame_no = check.frame_no(gid, pid, frame_no, tracking) # Start prepping the data for the plot. Primarily, the jersey numbers' # rotation angle based on team and play direction tracking['jersey_num_orientation'] = orient_jersey_num( gid, pid, prechecked_gid, prechecked_pid, tracking) # Split the frame's data into the home team, the away team, and the ball's # data (respectively) home_frame = tracking[(tracking['team'] == 'home') & (tracking['frame_id'] == frame_no)] away_frame = tracking[(tracking['team'] == 'away') & (tracking['frame_id'] == frame_no)] ball_frame = tracking[(tracking['team'] == 'football') & (tracking['frame_id'] == frame_no)] # Get the hex color information about each team to use to make the plot teams_info = load.teams_data() home_info = teams_info[teams_info['team_code'] == home] away_info = teams_info[teams_info['team_code'] == away] home_uni_base = home_info['home_uni_base'].iloc[0] home_uni_highlight = home_info['home_uni_highlight'].iloc[0] home_uni_number = home_info['home_uni_number'].iloc[0] home_uni_number_highlight = home_info['home_uni_number_highlight'].iloc[0] away_uni_base = away_info['away_uni_base'].iloc[0] away_uni_highlight = away_info['away_uni_highlight'].iloc[0] away_uni_number = away_info['away_uni_number'].iloc[0] away_uni_number_highlight = away_info['away_uni_number_highlight'].iloc[0] # If the line of scrimmage is to be plotted, determine its position if plot_los: los = find.line_of_scrimmage(gid, pid) los = pd.DataFrame({ 'x': [ los - (2 / 12), los + (2 / 12), los + (2 / 12), los - (2 / 12), los - (2 / 12) ], 'y': [1 / 9, 1 / 9, 53 + (2 / 9), 53 + (2 / 9), 1 / 9] }) # If the first down line is to be plotted, determine its position if plot_first_down_marker: first_down = find.first_down_line(gid, pid, tracking, prechecked_gid, prechecked_pid) first_down_line = pd.DataFrame({ 'x': [ first_down - (2 / 12), first_down + (2 / 12), first_down + (2 / 12), first_down - (2 / 12), first_down - (2 / 12) ], 'y': [1 / 9, 1 / 9, 53 + (2 / 9), 53 + (2 / 9), 1 / 9] }) # Draw the field fig, ax = field(gid) # Plot the home team's players home_frame.plot(x='player_x', y='player_y', kind='scatter', ax=ax, color=home_uni_base, s=800, edgecolor=home_uni_highlight, linewidth=2, zorder=15) # Add the jersey numbers for the home team for i, player in home_frame.iterrows(): ax.text( x=player['player_x'], y=player['player_y'], s=str(int(player['player_no'])), fontsize=15, color=home_uni_number, path_effects=[ pe.withStroke(linewidth=3, foreground=home_uni_number_highlight) ], fontweight='bold', rotation=player['jersey_num_orientation'], zorder=20, fontdict={ 'ha': 'center', 'va': 'center' }, ) if plot_arrows: ax.arrow(x=player['player_x'], y=player['player_y'], dx=3 * math.cos(player['player_orientation']), dy=3 * math.sin(player['player_orientation']), length_includes_head=True, width=0.3, color=home_uni_highlight, zorder=14) # Plot the away team's players away_frame.plot('player_x', 'player_y', kind='scatter', ax=ax, color=away_uni_base, s=800, edgecolor=away_uni_highlight, linewidth=2, zorder=15) # Add the jersey numbers for the away team for i, player in away_frame.iterrows(): ax.text( x=player['player_x'], y=player['player_y'], s=str(int(player['player_no'])), fontsize=15, color=away_uni_number, path_effects=[ pe.withStroke(linewidth=3, foreground=away_uni_number_highlight) ], fontweight='bold', rotation=player['jersey_num_orientation'], zorder=20, fontdict={ 'ha': 'center', 'va': 'center' }, ) if plot_arrows: ax.arrow(x=player['player_x'], y=player['player_y'], dx=3 * math.cos(player['player_orientation']), dy=3 * math.sin(player['player_orientation']), length_includes_head=True, width=0.3, color=away_uni_highlight, zorder=14) # Plot the ball ball_frame.plot('player_x', 'player_y', kind='scatter', ax=ax, color='#624a2e', s=100, edgecolor='#000000', linewidth=2, zorder=15) ax.fill(los['x'], los['y'], '#183ec1') ax.fill(first_down_line['x'], first_down_line['y'], '#ffcb05') return fig, ax
def orient_jersey_num(gid, pid, prechecked_gid=False, prechecked_pid=False, tracking=pd.DataFrame()): """ Manipulate the tracking data to get the correct orientation for the jersey numbers of players involved in the play that will be plotted Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function tracking: a dataframe of tracking data that can be used to speed up data loading Returns ------- tracking: a dataframe of tracking data with the proper orientation for the jersey numbers of the players involved """ # If the game ID is not already checked, check that first if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # Now that the game ID is checked, move to the play ID. If that is not # already checked, check that next. prechecked_gid is now True regardless # of its initially passed value since the game ID has been checked in the # first if statement if not prechecked_pid: pid = check.play_id(gid, pid, prechecked_gid) # Now that the game ID and play ID have been checked, load the tracking # data for the play in the provided game. This will load all tracking data # for the play. It will if tracking.empty: tracking = merge.tracking_and_plays(gid, pid) tracking.loc[tracking['team'] == 'football', 'jersey_num_orientation'] = 0 tracking.loc[(tracking['team'] == 'home') & (tracking['play_direction'] == 'right'), 'jersey_num_orientation'] = -90 tracking.loc[(tracking['team'] == 'away') & (tracking['play_direction'] == 'right'), 'jersey_num_orientation'] = 90 tracking.loc[(tracking['team'] == 'home') & (tracking['play_direction'] == 'left'), 'jersey_num_orientation'] = 90 tracking.loc[(tracking['team'] == 'away') & (tracking['play_direction'] == 'left'), 'jersey_num_orientation'] = -90 return tracking['jersey_num_orientation']
def plays_data(gid = 0, pid = 0, prechecked_gid = False, prechecked_pid = False): """ Loads the plays information provided Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- plays: a data frame containing a cleaned, renamed copy of plays information """ if gid == 0: if pid == 0: # If the game ID and play ID are both not provided, read in all # plays data plays = pd.read_csv(fp.plays_data_file) # If the play ID is provided but is not checked prior to being passed # to the function, check to see if the play exists in the plays data else: if prechecked_pid == False: # Load in all plays with this play ID plays = pd.read_csv(fp.plays_data_file)[lambda x: \ x['playId'] == pid] # If the loaded data has records, that's great. If not, load # all plays data for all games and alert user if not plays.empty: pass else: plays = pd.read_csv(fp.plays_data_file) print(f'Play ID {pid} does not exist. All plays for all ' 'games will be returned.') # If the game ID is supplied... else: # If the game ID has not yet been checked, check it now if prechecked_gid == False: gid = check.game_id(gid) else: pass if pid == 0: plays = pd.read_csv(fp.plays_data_file)[lambda x: \ x['gameId'] == gid] else: if prechecked_pid == False: plays = pd.read_csv(fp.plays_data_file)[lambda x: \ (x['gameId'] == gid) & (x['playId'] == pid)] # If the play for that game does exist, that's great. If not, # load all plays data for this game and alert user if not plays.empty: pass else: plays = pd.read_csv(fp.plays_data_file)[lambda x: \ x['gameId'] == gid] print(f'Play ID {pid} does not exist for game {gid}. All ' f'plays for game {gid} will be returned.') else: plays = pd.read_csv(fp.plays_data_file)[lambda x: \ (x['gameId'] == gid) & (x['playId'] == pid)] # Rename columns plays.columns = [ 'game_id', 'play_id', 'play_description', 'quarter', 'down', 'yds_to_go', 'possession_team', 'play_type', 'yardline_side', 'yardline_number', 'offense_formation', 'personnel_offense', 'defenders_in_box', 'n_pass_rushers', 'personnel_defense', 'type_dropback', 'presnap_away_score', 'presnap_home_score', 'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player', 'pass_result', 'offensive_play_result', 'play_result', 'epa', 'is_defensive_pi' ] # Get rid of the fraction of seconds in the game clock plays['game_clock'] = plays['game_clock'].str[:-3] # Create a pre-play down and distance summary with relevant game info plays['down_str'] = plays['down'].astype(str) plays.loc[plays['down_str'] == '1', 'down_str'] = '1st' plays.loc[plays['down_str'] == '2', 'down_str'] = '2nd' plays.loc[plays['down_str'] == '3', 'down_str'] = '3rd' plays.loc[plays['down_str'] == '4', 'down_str'] = '4th' plays['qtr'] = 'Q' + plays['quarter'].astype(str) plays['down_dist_summary'] = plays['qtr'] + ' - ' + \ plays['game_clock'].astype(str) + ' - ' + plays['possession_team'] + \ ' - ' + plays['down_str'] + ' & ' + plays['yds_to_go'].astype(str) + \ ' from ' + plays['yardline_side'] + ' ' + \ plays['yardline_number'].astype(str) # Remove the 'play_type_' prefix from all play_type records, leaving only # 'pass', 'sack', 'unknown' as play type plays['play_type'] = plays['play_type'].str.replace('play_type_', '') # Change coding of pass_result column plays.loc[plays['pass_result'] == 'C', 'pass_result'] = 'COMPLETE' plays.loc[plays['pass_result'] == 'I', 'pass_result'] = 'INCOMPLETE' plays.loc[plays['pass_result'] == 'S', 'pass_result'] = 'SACK' plays.loc[plays['pass_result'] == 'IN', 'pass_result'] = 'INTERCEPTION' plays.loc[plays['pass_result'] == 'R', 'pass_result'] = 'SCRAMBLE' plays.loc[plays['pass_result'].isna(), 'pass_result'] = None # Keep only the necessary columns plays = plays [[ 'game_id', 'play_id', 'play_description', 'quarter', 'down', 'yds_to_go', 'possession_team', 'play_type', 'yardline_side', 'yardline_number', 'offense_formation', 'personnel_offense', 'defenders_in_box', 'n_pass_rushers', 'personnel_defense', 'type_dropback', 'presnap_away_score', 'presnap_home_score', 'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player', 'pass_result', 'offensive_play_result', 'play_result', 'epa', 'is_defensive_pi', 'down_dist_summary' ]] return plays
def tracking_data(gid = 0, pid = 0, week = 0, prechecked_gid = False, prechecked_pid = False, prechecked_week = False): """ Loads the tracking information provided for a specified week Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id week: an integer of which week's tracking data to return. A value of 0 implies to return all weeks' tracking. The default is 0. prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- trk: a data frame containing a cleaned, renamed copy of tracking information for the specified week """ # Check which week to load. If neither the game ID nor week number are # passed to the function, load all weeks (this is slow) if gid == 0 and week == 0: trk = pd.DataFrame() for week in range(1, 17): print(f'Loading week {week}...', end = '\r') week_file = os.path.join(fp.data_dir, f'week{week}.csv') this_week = pd.read_csv(week_file) trk = pd.concat([trk, this_week]) else: # If the game ID is provided, but not checked, check the game ID first if gid != 0: if prechecked_gid == False: gid = check.game_id(gid) else: pass if week == 0: week = find.game_week(gid) prechecked_week = True if week != 0: # If the week is prechecked, that's great. If not, check the week if prechecked_week == True: pass else: week = check.week_number(week) # If the week number is not supplied, set the week number elif gid != 0 : week = check.week_number(week) # If the play ID is provided, but not checked, check the play ID next if pid != 0: if prechecked_pid == False: pid = check.play_id(gid, pid) else: pass # Now that the relevant data has all been checked, load the dataset # accordingly if gid != 0 and pid != 0: # If there is a game ID and play ID supplied, load only the # tracking information for this play in this game week_file = os.path.join(fp.data_dir, f'week{week}.csv') trk = pd.read_csv(week_file)[lambda x: (x['gameId'] == gid) & (x['playId'] == pid)] elif gid != 0 and pid == 0: # If there is a game ID but not a play ID supplied, load all # tracking data from all plays of this game week_file = os.path.join(fp.data_dir, f'week{week}.csv') trk = pd.read_csv(week_file)[lambda x: x['gameId'] == gid] elif gid == 0 and pid != 0: # If there is a play Id but not a game ID supplied, load all # tracking data from all plays in the week of this game with a # matching play ID week_file = os.path.join(fp.data_dir, f'week{week}.csv') trk = pd.read_csv(week_file)[lambda x: (x['playID'] == pid)] else: # If there's no game ID or play ID supplied, load all tracking # data for the week week_file = os.path.join(fp.data_dir, f'week{week}.csv') trk = pd.read_csv(week_file) # Rename columns trk.columns = [ 'time', 'player_x', 'player_y', 'player_speed', 'player_acceleration', 'distance', 'player_orientation', 'player_direction', 'event_str', 'player_id', 'player_name', 'player_no', 'player_position', 'frame_id', 'team', 'game_id', 'play_id', 'play_direction', 'route_type' ] # Correct the angular variables to be plottable (needs to be in radians) trk['player_orientation'] = np.mod(90 - trk['player_orientation'], 360) trk['player_orientation'] *= math.pi / 180 trk['player_direction'] = np.mod(90 - trk['player_direction'], 360) trk['player_direction'] *= math.pi / 180 return trk
def plays_matching(gid=0, home='', away='', play_info={}, prechecked_gid=False): """ Parameters ---------- gid: an integer of a game_id home: a string representing the home team's team code away: a string representing the away team's team code play_info: a dictionary of parameters to use for subsetting. The keys MUST be columns in the plays data to be used. If not, they will be ignored prechecked_gid: a boolean of whether or not the game ID has been prechecked Returns ------- plays_from_game: a dataframe of plays that match the passed criteria """ # Game ID should be the primary lookup tool, so start with loading the # game's data if this is passed if gid != 0: # If the game ID is not already checked, check the game ID first if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # If the game ID is not passed, then try to get a game ID based on the home # and away team. If this yields nothing, then load all games if home != '' or away != '': home = check.team_code(home) away = check.team_code(away) gid = game_id(home, away) prechecked_gid = True # Load in plays data plays_from_game = merge.plays_and_games(gid, home, away, prechecked_gid) # Subset by the information about the play in the parameter play_info if bool(play_info): # Fix all strings to be upper case if 'offensive_team' in play_info.keys(): play_info['offensive_team'] = play_info['offensive_team'].upper() if 'defensive_team' in play_info.keys(): play_info['defensive_team'] = play_info['defensive_team'].upper() if 'possession_team' in play_info.keys(): play_info['possession_team'] = play_info['possession_team'].upper() if 'home' in play_info.keys(): play_info['home'] = play_info['home'].upper() if 'away' in play_info.keys(): play_info['away'] = play_info['away'].upper() if 'pass_result' in play_info.keys(): play_info['pass_result'] = play_info['pass_result'].upper() if 'type_dropback' in play_info.keys(): play_info['type_dropback'] = play_info['type_dropback'].upper() for key, val in play_info.items(): # If the desired parameter is not in the columns of the plays data, # alert user and skip this subsetting parameter if key not in plays_from_game.columns: print(f'{key} is not a valid column to use for subsetting as' ' it does not appear in the dataset.') continue # If the value passed in the plays_info dictionary is a list, use # the .isin() method for subsetting if type(val) == list: plays_from_game = plays_from_game[ plays_from_game[f'{key}'].isin(val)] # Otherwise, use the key and value alone else: plays_from_game = plays_from_game[plays_from_game[f'{key}'] == val] # Return all plays that match the criteria return plays_from_game
def first_down_line(gid, pid, tracking=pd.DataFrame(), prechecked_gid=False, prechecked_pid=False): """ Finds what yardline is needed to be gained to achieve a first down Parameters ---------- gid: an integer of a game_id pid: an integer of a play_id tracking: a set of tracking information pertaining to a particular play. If none is provided, the entire tracking set will be used. This is the default prechecked_gid: a boolean of whether or not the game ID has been checked before being passed to the function prechecked_pid: a boolean of whether or not the play ID has been checked before being passed to the function Returns ------- first_down_yardline: a float representing the absolute yardline needed to achieve a first down """ if not prechecked_gid: # Validate the game ID gid = check.game_id(gid) prechecked_gid = True if not prechecked_pid: # Validate the play ID pid = check.play_id(gid, pid) prechecked_pid = True # Load in the schedule data games = load.games_data(gid, prechecked_gid) # Get the week of the game so that the correct tracking information can be # loaded week = games.loc[games['game_id'] == gid, 'week'].iloc[0] # Get the line of scrimmage and number of yards needed to achieve a first # down los = line_of_scrimmage(gid, pid) distance_to_first = yards_to_go(gid, pid) # Load in the appropriate tracking data, then subset to only be for the # desired play if tracking.empty: tracking = load.tracking_data(gid, pid, week, prechecked_gid=True, prechecked_pid=True, prechecked_week=True) # Get the direction of play. If the play is going right, yards will be # added, otherwise they will be subtracted play_direction = tracking['play_direction'].iloc[0] # Calculate the yardline needed to be gained to achieve a first down if play_direction == 'right': first_down_yardline = los + distance_to_first else: first_down_yardline = los - distance_to_first return first_down_yardline
def play_id(gid=0, home='', away='', play_info={}, prechecked_gid=False): """ Finds the play ID of a particular play Parameters ---------- gid: an integer of a game_id home: a string representing the home team's team code away: a string representing the away team's team code play_info: a dictionary of parameters to use for subsetting. The keys MUST be columns in the plays data to be used. If not, they will be ignored prechecked_gid: a boolean of whether or not the game ID has been prechecked Returns ------- pid: an integer of a play_id """ # Game ID should be the primary lookup tool, so start with loading the # game's data if this is passed if gid != 0: # If the game ID is not already checked, check the game ID first if not prechecked_gid: gid = check.game_id(gid) prechecked_gid = True # If the game ID is not passed, then try to get a game ID based on the home # and away team. If this yields nothing, then load all games if home != '' or away != '': home = check.team_code(home) away = check.team_code(away) gid = game_id(home, away) prechecked_gid = True # Load in plays from the identified game, or from all games if game ID = 0 plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid) # Subset by the information about the play in the parameter play_info if bool(play_info): for key, val in play_info.items(): # If the desired parameter is not in the columns of the plays data, # alert user and skip this subsetting parameter if key not in plays_from_game.columns: print(f'{key} is not a valid column to use for subsetting as' ' it does not appear in the dataset.') continue # If the value passed in the plays_info dictionary is a list, use # the .isin() method for subsetting if type(val) == list: plays_from_game = plays_from_game[ plays_from_game[f'{key}'].isin(val)] # Otherwise, use the key and value alone else: plays_from_game = plays_from_game[plays_from_game[f'{key}'] == val] # If the passed parameters are enough to identify the play, there # should only be one play ID remaining. Return this value if len(plays_from_game) == 1: pid = plays_from_game['play_id'].values[0] else: for i, play in plays_from_game.iterrows(): print(f'{play.game_id} -- {play.play_id} -- ' f'{play.down_dist_summary}') gid = input('Which game ID were you looking for?\nGame ID: ') pid = input('Which play of the above are you looking for?\nPlay ID: ') gid = check.game_id(gid) prechecked_gid = True pid = check.play_id(gid, pid, prechecked_gid) return pid