Esempio n. 1
0
def yards_to_go(gid, pid, prechecked_gid=False, prechecked_pid=False):
    """
    Finds the distance needed (in yards) to achieve a first down

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function
         
    Returns
    -------
    yds_to_go: an integer number of yards needed on a play to achieve
        a first down
    """
    if not prechecked_gid:
        # Validate the game ID
        gid = check.game_id(gid)
        prechecked_gid = True

    if not prechecked_pid:
        # Validate the play ID
        pid = check.play_id(gid, pid)
        prechecked_pid = True

    # Load in the plays data
    play = load.plays_data(gid, pid, prechecked_gid, prechecked_pid)

    # Get the number of yards needed for a first down
    yds_to_go = play['yds_to_go'].iloc[0]

    return yds_to_go
Esempio n. 2
0
def line_of_scrimmage(gid, pid, prechecked_gid=False, prechecked_pid=False):
    """
    Finds the line of scrimmage for a specified play

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function
    
    Returns
    -------
    los: a float of the absolute yardline of the line of scrimmage
    """
    if not prechecked_gid:
        # Validate the game ID
        gid = check.game_id(gid)
        prechecked_gid = True

    if not prechecked_pid:
        # Validate the play ID
        pid = check.play_id(gid, pid)
        prechecked_pid = True

    # Load in the plays data
    play = load.plays_data(gid, pid, prechecked_gid, prechecked_pid)

    # Get the line of scrimmage
    los = play['absolute_yard_line'].iloc[0]

    return los
Esempio n. 3
0
def n_frames(gid,
             pid,
             tracking=pd.DataFrame(),
             prechecked_gid=False,
             prechecked_pid=False):
    """
    Finds the number of frames recorded for a particular play

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    tracking: a set of tracking information pertaining to a particular play.
        If none is provided, the entire tracking set will be used. This is
        the default
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function

    Returns
    -------
    num_frames: an integer representing how many frames were recorded for the
        play
    """
    if not prechecked_gid:
        # Validate the game ID
        gid = check.game_id(gid)
        prechecked_gid = True

    if not prechecked_pid:
        # Validate the play ID
        pid = check.play_id(gid, pid)
        prechecked_pid = True

    # If no tracking information is provided, load the tracking information
    # for the week containing the desired play
    if tracking.empty:
        week = game_week(gid)
        tracking = load.tracking_data(gid,
                                      pid,
                                      week,
                                      prechecked_gid,
                                      prechecked_pid,
                                      prechecked_week=True)

    # Get the last frame of the play
    num_frames = tracking['frame_id'].max()

    return num_frames
Esempio n. 4
0
def game_week(gid):
    """
    Finds the week in which a particular game was played

    Parameters
    ----------
    gid: an integer of a game_id

    Returns
    -------
    week: an integer representing the week the game was played in
    """
    # Validate the game ID
    gid = check.game_id(gid)

    # Bring in the schedule data
    games = load.games_data()

    # Get the week corresponding to the game ID provided
    week = games.loc[games['game_id'] == gid, 'week'].iloc[0]

    return week
Esempio n. 5
0
def game_teams(gid):
    """
    Finds the teams that played in a specified game (via game_id)

    Parameters
    ----------
    gid: an integer of a game_id

    Returns
    -------
    home: a string of the home team's code
    away: a string of the away team's code
    """
    # Validate the game ID
    gid = check.game_id(gid)

    # Bring in the schedule data
    games = load.games_data()

    # Get the home and away team codes
    home = games.loc[games['game_id'] == gid, 'home'].iloc[0]
    away = games.loc[games['game_id'] == gid, 'away'].iloc[0]

    return home, away
def plays_and_games(gid=0, home='', away='', prechecked_gid=False):
    """
    Merges play and game data together to better illustrate what plays are
    being run by which team and against which opponent

    Parameters
    ----------
    gid: an integer of a game_id
    home: a string representing the home team's team code
    away: a string representing the away team's team code
    play_info: a dictionary of parameters to use for subsetting. The keys MUST
        be columns in the plays data to be used. If not, they will be ignored
    prechecked_gid: a boolean of whether or not the game ID has been prechecked

    Returns
    -------
    plays_from_game: a merged dataframe of play and game data

    """
    if gid != 0:
        # If the game ID is not already checked, check the game ID first
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

    # If the game ID is not passed, then try to get a game ID based on the home
    # and away team. If this yields nothing, then load all games
    if home != '' or away != '':
        home = check.team_code(home)
        away = check.team_code(away)

        gid = find.game_id(home, away)
        prechecked_gid = True

    # Load in plays from the identified game, or from all games if game ID = 0
    plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid)

    # Load in the games data to merge
    games_data = load.games_data(
        gid, prechecked_gid)[['game_id', 'home', 'away', 'week']]

    plays_from_game = pd.merge(left=plays_from_game,
                               right=games_data,
                               how='inner',
                               on='game_id')

    plays_from_game['offensive_team'] = plays_from_game['possession_team']
    plays_from_game['defensive_team'] = np.where(
        plays_from_game['offensive_team'] == plays_from_game['home'],
        plays_from_game['away'], plays_from_game['home'])

    plays_from_game = plays_from_game[[
        'game_id', 'play_id', 'play_description', 'quarter', 'down',
        'yds_to_go', 'possession_team', 'play_type', 'yardline_side',
        'yardline_number', 'offense_formation', 'personnel_offense',
        'defenders_in_box', 'n_pass_rushers', 'personnel_defense',
        'type_dropback', 'presnap_away_score', 'presnap_home_score',
        'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player',
        'pass_result', 'offensive_play_result', 'play_result', 'epa',
        'is_defensive_pi', 'down_dist_summary', 'home', 'away',
        'offensive_team', 'defensive_team', 'week'
    ]]

    return plays_from_game
def field(gid=0, home='nfl', away='', show=False, unit='yd', zero='l'):
    """
    Draws a football field with the teams who are participating in the game.
    Teams are either supplied via the home and away arguments, or by looking
    them up from the game_id provided by the gid argument
    
    Parameters
    ----------
    gid: an int of a game_id for which to draw the field
    home: a string of the home team's code. Not necessary if a game_id is
        provided
    away: a string of the away team's code. Not necessary if a game_id is
        provided
    show: a boolean of whether or not to show the plot
    unit: a string for the units with which to draw the field. Default is 'yds'
        for yards, could be 'ft' for feet
    zero: a string for where the origin of the plot should be. Default is 'l',
        meaning lower left corner. Could be 'c' for center

    Returns
    -------
    fig, ax: the figure and axes objects (respectively)
    """

    # If a game ID is provided, get the home and away team from the provided
    # game ID
    if gid != 0:
        gid = check.game_id(gid)
        home, away = find.game_teams(gid)

    # If no game ID provided, and the home team is 'NFL', set home and away
    # to NFC and AFC respectively. Otherwise, check to make sure the teams are
    # legit
    else:
        home = home.upper()
        away = away.upper()
        if home == 'NFL':
            home = 'NFC'
            away = 'AFC'
        else:
            home = check.team_code(home)
            away = check.team_code(away)

    # Get the teams' color codes
    team_info = load.teams_data()

    home_info = team_info[team_info['team_code'] == home]
    away_info = team_info[team_info['team_code'] == away]

    #############################
    # Get the field coordinates #
    #############################
    sidelines, endlines, goal_lines, midline, minor_yd_lines_b, \
    minor_yd_lines_t, minor_yd_lines_l, minor_yd_lines_u, major_yd_lines, \
    hashes_l, hashes_u, extra_pt_mark, arrow_40_l, arrow_40_u, \
    arrow_30_l, arrow_30_u, arrow_20_l, arrow_20_u, arrow_10_l, \
    arrow_10_u, field_marks = load.football_field_coords()

    #################
    # Make the plot #
    #################
    fig, ax = plt.subplots()

    ax.set_aspect('equal')
    fig.set_size_inches(50, 22.2)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)

    # Set field color
    ax.set_facecolor('#196f0c')

    # Put home logo at midfield
    if home.lower() in ['', 'nfl', 'nfc', 'afc']:
        img = os.path.join('img', 'logos', 'nfl.png')
    else:
        img = os.path.join('img', 'logos', f'{home}.png')

    img = plt.imread(img)

    if unit == 'yd':
        ax.imshow(img, extent=[52., 68., 18.65, 34.65], zorder=10)
    else:
        ax.imshow(img, extent=[-18., 18., -18., 18.], zorder=10)

    # Add sidelines, goal line, and 50 yard line
    ax.fill(sidelines['x'], sidelines['y'], '#ffffff')
    ax.fill(endlines['x'], endlines['y'], '#ffffff')
    ax.fill(goal_lines['x'], goal_lines['y'], '#ffffff')
    ax.fill(midline['x'], midline['y'], '#ffffff')

    # Add minor yard lines and major yard lines
    ax.fill(minor_yd_lines_b['x'], minor_yd_lines_b['y'], '#ffffff')
    ax.fill(minor_yd_lines_t['x'], minor_yd_lines_t['y'], '#ffffff')
    ax.fill(minor_yd_lines_l['x'], minor_yd_lines_l['y'], '#ffffff')
    ax.fill(minor_yd_lines_u['x'], minor_yd_lines_u['y'], '#ffffff')
    ax.fill(major_yd_lines['x'], major_yd_lines['y'], '#ffffff')

    # Add hash marks and extra point markers
    ax.fill(hashes_l['x'], hashes_l['y'], '#ffffff')
    ax.fill(hashes_u['x'], hashes_u['y'], '#ffffff')
    ax.fill(extra_pt_mark['x'], extra_pt_mark['y'], '#ffffff')

    # Add the numbers to the field
    for i, label in field_marks.iterrows():
        ax.text(x=label['x'],
                y=label['y'],
                s=label['text'],
                fontsize=50,
                color='#ffffff',
                fontweight='bold',
                rotation=label['rotation'],
                fontname='Impact')

    # Add the arrows to the field
    ax.fill(arrow_40_l['x'], arrow_40_l['y'], '#ffffff')
    ax.fill(arrow_40_u['x'], arrow_40_u['y'], '#ffffff')
    ax.fill(arrow_30_l['x'], arrow_30_l['y'], '#ffffff')
    ax.fill(arrow_30_u['x'], arrow_30_u['y'], '#ffffff')
    ax.fill(arrow_20_l['x'], arrow_20_l['y'], '#ffffff')
    ax.fill(arrow_20_u['x'], arrow_20_u['y'], '#ffffff')
    ax.fill(arrow_10_l['x'], arrow_10_l['y'], '#ffffff')
    ax.fill(arrow_10_u['x'], arrow_10_u['y'], '#ffffff')

    ax.text(x=5,
            y=26.65,
            s=f'{home_info.nickname.iloc[0]}',
            fontdict={
                'ha': 'center',
                'va': 'center'
            },
            fontsize=100,
            fontweight='bold',
            fontname='Impact',
            color=f'{home_info.endzone_text.iloc[0]}',
            rotation=90,
            path_effects=[
                pe.withStroke(linewidth=20,
                              foreground=f'{home_info.endzone_shadow.iloc[0]}')
            ])

    ax.text(x=114,
            y=26.65,
            s=f'{away_info.nickname.iloc[0]}',
            fontdict={
                'ha': 'center',
                'va': 'center'
            },
            fontsize=100,
            fontweight='bold',
            fontname='Impact',
            color=f'{away_info.endzone_text.iloc[0]}',
            rotation=-90,
            path_effects=[
                pe.withStroke(linewidth=20,
                              foreground=f'{away_info.endzone_shadow.iloc[0]}')
            ])

    if show:
        plt.show()
        return None
    else:
        return fig, ax
def play_gif(gid=0,
             pid=0,
             home='',
             away='',
             prechecked_gid=False,
             prechecked_pid=False,
             tracking=pd.DataFrame()):
    # If a game ID is provided, get the home and away team from the provided
    # game ID
    if gid != 0:
        # Start by checking the game ID if it is provided but not yet checked
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

        # Get the home and away teams for the game
        home, away = find.game_teams(gid)

    # If no game ID provided, and the home team is 'NFL', set home and away
    # to NFC and AFC respectively. Otherwise, check to make sure the teams are
    # legit
    else:
        home = home.upper()
        away = away.upper()
        if home == 'NFL':
            home = 'NFC'
            away = 'AFC'
        else:
            home = check.team_code(home)
            away = check.team_code(away)
            gid = find.game_id(home, away)

    # Next, check the play ID if it has not already been checked
    if not prechecked_pid:
        pid = check.play_id(gid, pid, prechecked_gid)
        prechecked_pid = True

    # If tracking isn't supplied, load all relevant tracking data
    if tracking.empty:
        tracking = merge.tracking_and_plays(gid, pid)

    # Get the number of frames in the play
    n_frames = find.n_frames(gid=gid,
                             pid=pid,
                             tracking=tracking,
                             prechecked_gid=True,
                             prechecked_pid=True)

    # Make the temporary directory to hold static images
    file_ops.make_gif_temp_dir(gid, pid)

    # Make each frame as a static image
    for i in np.arange(1, n_frames + 1):
        print(f'Processing frame {i} of {n_frames}')
        fig, ax = play_frame(gid,
                             pid,
                             frame_no=i,
                             prechecked_gid=True,
                             prechecked_pid=True,
                             tracking=tracking,
                             prechecked_frame=True)

        if i < 10:
            fname = os.path.join('img', 'temp', f'{gid}_{pid}',
                                 f'{gid}_{pid}_000{i}.png')
        elif i < 100:
            fname = os.path.join('img', 'temp', f'{gid}_{pid}',
                                 f'{gid}_{pid}_00{i}.png')
        else:
            fname = os.path.join('img', 'temp', f'{gid}_{pid}',
                                 f'{gid}_{pid}_0{i}.png')

        plt.savefig(f'{fname}', bbox_inches='tight', pad_inches=0)

    try:
        gif_fname = tracking['down_dist_summary'].values[0] + '.gif'

    except:
        gif_fname = str(pid) + '.gif'

    # Collect the static images
    images = file_ops.collect_gif_play_frames(gid, pid)

    # Make and save the gif
    file_ops.make_gif(gid, pid, images, fname=gif_fname)

    # Delete the temporary directory that holds all static images.
    file_ops.remove_temp_static_frame_directory(gid, pid)

    return None
def play_frame(gid=0,
               pid=0,
               home='',
               away='',
               frame_no=0,
               plot_los=True,
               plot_first_down_marker=True,
               plot_arrows=True,
               prechecked_gid=False,
               prechecked_pid=False,
               prechecked_frame=False,
               tracking=pd.DataFrame()):
    """
    Draw a frame of a given play. Teams are either supplied via the home and
    away arguments, or by looking them up from the game_id provided by the gid
    argument

    Parameters
    ----------
    gid: an int representing the game_id
    pid: an int representing the play_id
    home: a string of the home team's code. Not necessary if a game_id is
        provided
    away: a string of the away team's code. Not necessary if a game_id is
        provided
    frame_no: the number of the frame to plot
    plot_los: a boolean of whether or not to plot the line of scrimmage on the
        plot
    plot_first_down_marker: a boolean of whether or not to plot the first
        down line on the plot
    prechecked_frame: a boolean indicating whether or not it's okay to
        skip the frame validation. Defaulting to False, but should be set to
        True when using the draw_play_gif() function
    tracking: a dataframe of tracking data that can be used to speed up
        plotting

    Returns
    -------
    fig, ax: the figure and axes objects (respectively)
    """
    if gid != 0:
        # Start by checking the game ID if it is provided but not yet checked
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

        # Get the home and away teams for the game
        home, away = find.game_teams(gid)

    # If no game ID provided, and the home team is 'NFL', set home and away
    # to NFC and AFC respectively. Otherwise, check to make sure the teams are
    # legit
    else:
        home = home.upper()
        away = away.upper()
        if home == 'NFL':
            home = 'NFC'
            away = 'AFC'
        else:
            home = check.team_code(home)
            away = check.team_code(away)
            gid = find.game_id(home, away)

    # Next, check the play ID if it has not already been checked
    if not prechecked_pid:
        pid = check.play_id(gid, pid, prechecked_gid)
        prechecked_pid = True

    # If tracking isn't supplied, load all relevant tracking data
    if tracking.empty:
        tracking = merge.tracking_and_plays(gid, pid)

    if not prechecked_frame:
        frame_no = check.frame_no(gid, pid, frame_no, tracking)

    # Start prepping the data for the plot. Primarily, the jersey numbers'
    # rotation angle based on team and play direction
    tracking['jersey_num_orientation'] = orient_jersey_num(
        gid, pid, prechecked_gid, prechecked_pid, tracking)

    # Split the frame's data into the home team, the away team, and the ball's
    # data (respectively)
    home_frame = tracking[(tracking['team'] == 'home')
                          & (tracking['frame_id'] == frame_no)]
    away_frame = tracking[(tracking['team'] == 'away')
                          & (tracking['frame_id'] == frame_no)]
    ball_frame = tracking[(tracking['team'] == 'football')
                          & (tracking['frame_id'] == frame_no)]

    # Get the hex color information about each team to use to make the plot
    teams_info = load.teams_data()
    home_info = teams_info[teams_info['team_code'] == home]
    away_info = teams_info[teams_info['team_code'] == away]

    home_uni_base = home_info['home_uni_base'].iloc[0]
    home_uni_highlight = home_info['home_uni_highlight'].iloc[0]
    home_uni_number = home_info['home_uni_number'].iloc[0]
    home_uni_number_highlight = home_info['home_uni_number_highlight'].iloc[0]

    away_uni_base = away_info['away_uni_base'].iloc[0]
    away_uni_highlight = away_info['away_uni_highlight'].iloc[0]
    away_uni_number = away_info['away_uni_number'].iloc[0]
    away_uni_number_highlight = away_info['away_uni_number_highlight'].iloc[0]

    # If the line of scrimmage is to be plotted, determine its position
    if plot_los:
        los = find.line_of_scrimmage(gid, pid)
        los = pd.DataFrame({
            'x': [
                los - (2 / 12), los + (2 / 12), los + (2 / 12), los - (2 / 12),
                los - (2 / 12)
            ],
            'y': [1 / 9, 1 / 9, 53 + (2 / 9), 53 + (2 / 9), 1 / 9]
        })

    # If the first down line is to be plotted, determine its position
    if plot_first_down_marker:
        first_down = find.first_down_line(gid, pid, tracking, prechecked_gid,
                                          prechecked_pid)

        first_down_line = pd.DataFrame({
            'x': [
                first_down - (2 / 12), first_down + (2 / 12),
                first_down + (2 / 12), first_down - (2 / 12),
                first_down - (2 / 12)
            ],
            'y': [1 / 9, 1 / 9, 53 + (2 / 9), 53 + (2 / 9), 1 / 9]
        })

    # Draw the field
    fig, ax = field(gid)

    # Plot the home team's players
    home_frame.plot(x='player_x',
                    y='player_y',
                    kind='scatter',
                    ax=ax,
                    color=home_uni_base,
                    s=800,
                    edgecolor=home_uni_highlight,
                    linewidth=2,
                    zorder=15)

    # Add the jersey numbers for the home team
    for i, player in home_frame.iterrows():
        ax.text(
            x=player['player_x'],
            y=player['player_y'],
            s=str(int(player['player_no'])),
            fontsize=15,
            color=home_uni_number,
            path_effects=[
                pe.withStroke(linewidth=3,
                              foreground=home_uni_number_highlight)
            ],
            fontweight='bold',
            rotation=player['jersey_num_orientation'],
            zorder=20,
            fontdict={
                'ha': 'center',
                'va': 'center'
            },
        )

        if plot_arrows:
            ax.arrow(x=player['player_x'],
                     y=player['player_y'],
                     dx=3 * math.cos(player['player_orientation']),
                     dy=3 * math.sin(player['player_orientation']),
                     length_includes_head=True,
                     width=0.3,
                     color=home_uni_highlight,
                     zorder=14)

    # Plot the away team's players
    away_frame.plot('player_x',
                    'player_y',
                    kind='scatter',
                    ax=ax,
                    color=away_uni_base,
                    s=800,
                    edgecolor=away_uni_highlight,
                    linewidth=2,
                    zorder=15)

    # Add the jersey numbers for the away team
    for i, player in away_frame.iterrows():
        ax.text(
            x=player['player_x'],
            y=player['player_y'],
            s=str(int(player['player_no'])),
            fontsize=15,
            color=away_uni_number,
            path_effects=[
                pe.withStroke(linewidth=3,
                              foreground=away_uni_number_highlight)
            ],
            fontweight='bold',
            rotation=player['jersey_num_orientation'],
            zorder=20,
            fontdict={
                'ha': 'center',
                'va': 'center'
            },
        )

        if plot_arrows:
            ax.arrow(x=player['player_x'],
                     y=player['player_y'],
                     dx=3 * math.cos(player['player_orientation']),
                     dy=3 * math.sin(player['player_orientation']),
                     length_includes_head=True,
                     width=0.3,
                     color=away_uni_highlight,
                     zorder=14)

    # Plot the ball
    ball_frame.plot('player_x',
                    'player_y',
                    kind='scatter',
                    ax=ax,
                    color='#624a2e',
                    s=100,
                    edgecolor='#000000',
                    linewidth=2,
                    zorder=15)

    ax.fill(los['x'], los['y'], '#183ec1')
    ax.fill(first_down_line['x'], first_down_line['y'], '#ffcb05')

    return fig, ax
def orient_jersey_num(gid,
                      pid,
                      prechecked_gid=False,
                      prechecked_pid=False,
                      tracking=pd.DataFrame()):
    """
    Manipulate the tracking data to get the correct orientation for the jersey
    numbers of players involved in the play that will be plotted

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function
    tracking: a dataframe of tracking data that can be used to speed up
        data loading

    Returns
    -------
    tracking: a dataframe of tracking data with the proper orientation for the
        jersey numbers of the players involved
    """
    # If the game ID is not already checked, check that first
    if not prechecked_gid:
        gid = check.game_id(gid)
        prechecked_gid = True

    # Now that the game ID is checked, move to the play ID. If that is not
    # already checked, check that next. prechecked_gid is now True regardless
    # of its initially passed value since the game ID has been checked in the
    # first if statement
    if not prechecked_pid:
        pid = check.play_id(gid, pid, prechecked_gid)

    # Now that the game ID and play ID have been checked, load the tracking
    # data for the play in the provided game. This will load all tracking data
    # for the play. It will
    if tracking.empty:
        tracking = merge.tracking_and_plays(gid, pid)

    tracking.loc[tracking['team'] == 'football', 'jersey_num_orientation'] = 0

    tracking.loc[(tracking['team'] == 'home') &
                 (tracking['play_direction'] == 'right'),
                 'jersey_num_orientation'] = -90

    tracking.loc[(tracking['team'] == 'away') &
                 (tracking['play_direction'] == 'right'),
                 'jersey_num_orientation'] = 90

    tracking.loc[(tracking['team'] == 'home') &
                 (tracking['play_direction'] == 'left'),
                 'jersey_num_orientation'] = 90

    tracking.loc[(tracking['team'] == 'away') &
                 (tracking['play_direction'] == 'left'),
                 'jersey_num_orientation'] = -90

    return tracking['jersey_num_orientation']
Esempio n. 11
0
def plays_data(gid = 0, pid = 0, prechecked_gid = False,
               prechecked_pid = False):
    """
    Loads the plays information provided
    
    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function
         
    Returns
    -------
    plays: a data frame containing a cleaned, renamed copy of plays
        information
    """
    if gid == 0:
        if pid == 0:
            # If the game ID and play ID are both not provided, read in all
            # plays data
            plays = pd.read_csv(fp.plays_data_file)
        
        # If the play ID is provided but is not checked prior to being passed
        # to the function, check to see if the play exists in the plays data
        else:
            if prechecked_pid == False:
                # Load in all plays with this play ID
                plays = pd.read_csv(fp.plays_data_file)[lambda x: \
                                                        x['playId'] == pid]
                
                # If the loaded data has records, that's great. If not, load
                # all plays data for all games and alert user
                if not plays.empty:
                    pass
                
                else:
                    plays = pd.read_csv(fp.plays_data_file)
                    print(f'Play ID {pid} does not exist. All plays for all '
                           'games will be returned.')
        
    # If the game ID is supplied...
    else:
        # If the game ID has not yet been checked, check it now
        if prechecked_gid == False:
            gid = check.game_id(gid)
        else:
            pass
        
        if pid == 0:
            plays = pd.read_csv(fp.plays_data_file)[lambda x: \
                                                    x['gameId'] == gid]
        else:
            if prechecked_pid == False:
                plays = pd.read_csv(fp.plays_data_file)[lambda x: \
                                                        (x['gameId'] == gid) &
                                                        (x['playId'] == pid)]
            
                # If the play for that game does exist, that's great. If not,
                # load all plays data for this game and alert user
                if not plays.empty:
                    pass
                else:
                    plays = pd.read_csv(fp.plays_data_file)[lambda x: \
                                                            x['gameId'] == gid]
                    print(f'Play ID {pid} does not exist for game {gid}. All '
                          f'plays for game {gid} will be returned.')
            else:
                plays = pd.read_csv(fp.plays_data_file)[lambda x: \
                                                        (x['gameId'] == gid) &
                                                        (x['playId'] == pid)]
                    
    # Rename columns
    plays.columns = [
        'game_id', 'play_id', 'play_description', 'quarter', 'down',
        'yds_to_go', 'possession_team', 'play_type', 'yardline_side',
        'yardline_number', 'offense_formation', 'personnel_offense',
        'defenders_in_box', 'n_pass_rushers', 'personnel_defense',
        'type_dropback', 'presnap_away_score', 'presnap_home_score',
        'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player',
        'pass_result', 'offensive_play_result', 'play_result', 'epa',
        'is_defensive_pi'
    ]
    
    # Get rid of the fraction of seconds in the game clock
    plays['game_clock'] = plays['game_clock'].str[:-3]
    
    # Create a pre-play down and distance summary with relevant game info
    plays['down_str'] = plays['down'].astype(str)
    plays.loc[plays['down_str'] == '1', 'down_str'] = '1st'
    plays.loc[plays['down_str'] == '2', 'down_str'] = '2nd'
    plays.loc[plays['down_str'] == '3', 'down_str'] = '3rd'
    plays.loc[plays['down_str'] == '4', 'down_str'] = '4th'
    plays['qtr'] = 'Q' + plays['quarter'].astype(str)
    
    plays['down_dist_summary'] = plays['qtr'] + ' - ' + \
        plays['game_clock'].astype(str) + ' - ' + plays['possession_team'] + \
        ' - ' + plays['down_str'] + ' & ' + plays['yds_to_go'].astype(str) + \
        ' from ' + plays['yardline_side'] + ' ' + \
        plays['yardline_number'].astype(str)
        
    # Remove the 'play_type_' prefix from all play_type records, leaving only
    # 'pass', 'sack', 'unknown' as play type
    plays['play_type'] = plays['play_type'].str.replace('play_type_', '')
    
    # Change coding of pass_result column
    plays.loc[plays['pass_result'] == 'C', 'pass_result'] = 'COMPLETE'
    plays.loc[plays['pass_result'] == 'I', 'pass_result'] = 'INCOMPLETE'
    plays.loc[plays['pass_result'] == 'S', 'pass_result'] = 'SACK'
    plays.loc[plays['pass_result'] == 'IN', 'pass_result'] = 'INTERCEPTION'
    plays.loc[plays['pass_result'] == 'R', 'pass_result'] = 'SCRAMBLE'
    plays.loc[plays['pass_result'].isna(), 'pass_result'] = None
    
    # Keep only the necessary columns
    plays = plays [[
        'game_id', 'play_id', 'play_description', 'quarter', 'down',
        'yds_to_go', 'possession_team', 'play_type', 'yardline_side',
        'yardline_number', 'offense_formation', 'personnel_offense',
        'defenders_in_box', 'n_pass_rushers', 'personnel_defense',
        'type_dropback', 'presnap_away_score', 'presnap_home_score',
        'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player',
        'pass_result', 'offensive_play_result', 'play_result', 'epa',
        'is_defensive_pi', 'down_dist_summary'
    ]]
    
    return plays
Esempio n. 12
0
def tracking_data(gid = 0, pid = 0, week = 0, prechecked_gid = False,
                  prechecked_pid = False, prechecked_week = False):
    """
    Loads the tracking information provided for a specified week
    
    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    week: an integer of which week's tracking data to return. A value of
        0 implies to return all weeks' tracking. The default is 0.
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function

    Returns
    -------
    trk: a data frame containing a cleaned, renamed copy of tracking
        information for the specified week
    """
    # Check which week to load. If neither the game ID nor week number are
    # passed to the function, load all weeks (this is slow)
    if gid == 0 and week == 0:
        trk = pd.DataFrame()
        for week in range(1, 17):
            print(f'Loading week {week}...', end = '\r')
            week_file = os.path.join(fp.data_dir, f'week{week}.csv')
            this_week = pd.read_csv(week_file)
            trk = pd.concat([trk, this_week])
    else:
        # If the game ID is provided, but not checked, check the game ID first
        if gid != 0:
            if prechecked_gid == False:
                gid = check.game_id(gid)
            else:
                pass
            if week == 0:
                week = find.game_week(gid)
                prechecked_week = True
            
        if week != 0:
            # If the week is prechecked, that's great. If not, check the week
            if prechecked_week == True:
                pass
            else:
                week = check.week_number(week)
            
        # If the week number is not supplied, set the week number
        elif gid != 0 :
            week = check.week_number(week)
        
        # If the play ID is provided, but not checked, check the play ID next
        if pid != 0:
            if prechecked_pid == False:
                pid = check.play_id(gid, pid)
            else:
                pass
            
        # Now that the relevant data has all been checked, load the dataset
        # accordingly
        if gid != 0 and pid != 0:
            # If there is a game ID and play ID supplied, load only the
            # tracking information for this play in this game
            week_file = os.path.join(fp.data_dir, f'week{week}.csv')
            trk = pd.read_csv(week_file)[lambda x:
                                         (x['gameId'] == gid) &
                                         (x['playId'] == pid)]
        elif gid != 0 and pid == 0:
            # If there is a game ID but not a play ID supplied, load all
            # tracking data from all plays of this game
            week_file = os.path.join(fp.data_dir, f'week{week}.csv')
            trk = pd.read_csv(week_file)[lambda x: x['gameId'] == gid]
                
        elif gid == 0 and pid != 0:
            # If there is a play Id but not a game ID supplied, load all
            # tracking data from all plays in the week of this game with a
            # matching play ID
            week_file = os.path.join(fp.data_dir, f'week{week}.csv')
            trk = pd.read_csv(week_file)[lambda x: (x['playID'] == pid)]
        else:
            # If there's no game ID or play ID supplied, load all tracking
            # data for the week
            week_file = os.path.join(fp.data_dir, f'week{week}.csv')
            trk = pd.read_csv(week_file)
    
    # Rename columns
    trk.columns = [
        'time', 'player_x', 'player_y', 'player_speed', 'player_acceleration',
        'distance', 'player_orientation', 'player_direction', 'event_str',
        'player_id', 'player_name', 'player_no', 'player_position', 'frame_id',
        'team', 'game_id', 'play_id', 'play_direction', 'route_type'
    ]
    
    # Correct the angular variables to be plottable (needs to be in radians)
    trk['player_orientation'] = np.mod(90 - trk['player_orientation'], 360)
    trk['player_orientation'] *= math.pi / 180
    
    trk['player_direction'] = np.mod(90 - trk['player_direction'], 360)
    trk['player_direction'] *= math.pi / 180
    
    return trk
Esempio n. 13
0
def plays_matching(gid=0,
                   home='',
                   away='',
                   play_info={},
                   prechecked_gid=False):
    """
    

    Parameters
    ----------
    gid: an integer of a game_id
    home: a string representing the home team's team code
    away: a string representing the away team's team code
    play_info: a dictionary of parameters to use for subsetting. The keys MUST
        be columns in the plays data to be used. If not, they will be ignored
    prechecked_gid: a boolean of whether or not the game ID has been prechecked

    Returns
    -------
    plays_from_game: a dataframe of plays that match the passed criteria
    """
    # Game ID should be the primary lookup tool, so start with loading the
    # game's data if this is passed
    if gid != 0:
        # If the game ID is not already checked, check the game ID first
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

    # If the game ID is not passed, then try to get a game ID based on the home
    # and away team. If this yields nothing, then load all games
    if home != '' or away != '':
        home = check.team_code(home)
        away = check.team_code(away)

        gid = game_id(home, away)
        prechecked_gid = True

    # Load in plays data
    plays_from_game = merge.plays_and_games(gid, home, away, prechecked_gid)

    # Subset by the information about the play in the parameter play_info
    if bool(play_info):
        # Fix all strings to be upper case
        if 'offensive_team' in play_info.keys():
            play_info['offensive_team'] = play_info['offensive_team'].upper()

        if 'defensive_team' in play_info.keys():
            play_info['defensive_team'] = play_info['defensive_team'].upper()

        if 'possession_team' in play_info.keys():
            play_info['possession_team'] = play_info['possession_team'].upper()

        if 'home' in play_info.keys():
            play_info['home'] = play_info['home'].upper()

        if 'away' in play_info.keys():
            play_info['away'] = play_info['away'].upper()

        if 'pass_result' in play_info.keys():
            play_info['pass_result'] = play_info['pass_result'].upper()

        if 'type_dropback' in play_info.keys():
            play_info['type_dropback'] = play_info['type_dropback'].upper()

        for key, val in play_info.items():
            # If the desired parameter is not in the columns of the plays data,
            # alert user and skip this subsetting parameter
            if key not in plays_from_game.columns:
                print(f'{key} is not a valid column to use for subsetting as'
                      ' it does not appear in the dataset.')
                continue

            # If the value passed in the plays_info dictionary is a list, use
            # the .isin() method for subsetting
            if type(val) == list:
                plays_from_game = plays_from_game[
                    plays_from_game[f'{key}'].isin(val)]

            # Otherwise, use the key and value alone
            else:
                plays_from_game = plays_from_game[plays_from_game[f'{key}'] ==
                                                  val]

    # Return all plays that match the criteria
    return plays_from_game
Esempio n. 14
0
def first_down_line(gid,
                    pid,
                    tracking=pd.DataFrame(),
                    prechecked_gid=False,
                    prechecked_pid=False):
    """
    Finds what yardline is needed to be gained to achieve a first down

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    tracking: a set of tracking information pertaining to a particular play.
        If none is provided, the entire tracking set will be used. This is
        the default
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function

    Returns
    -------
    first_down_yardline: a float representing the absolute yardline needed
        to achieve a first down
    """
    if not prechecked_gid:
        # Validate the game ID
        gid = check.game_id(gid)
        prechecked_gid = True

    if not prechecked_pid:
        # Validate the play ID
        pid = check.play_id(gid, pid)
        prechecked_pid = True

    # Load in the schedule data
    games = load.games_data(gid, prechecked_gid)

    # Get the week of the game so that the correct tracking information can be
    # loaded
    week = games.loc[games['game_id'] == gid, 'week'].iloc[0]

    # Get the line of scrimmage and number of yards needed to achieve a first
    # down
    los = line_of_scrimmage(gid, pid)
    distance_to_first = yards_to_go(gid, pid)

    # Load in the appropriate tracking data, then subset to only be for the
    # desired play
    if tracking.empty:
        tracking = load.tracking_data(gid,
                                      pid,
                                      week,
                                      prechecked_gid=True,
                                      prechecked_pid=True,
                                      prechecked_week=True)

    # Get the direction of play. If the play is going right, yards will be
    # added, otherwise they will be subtracted
    play_direction = tracking['play_direction'].iloc[0]

    # Calculate the yardline needed to be gained to achieve a first down
    if play_direction == 'right':
        first_down_yardline = los + distance_to_first
    else:
        first_down_yardline = los - distance_to_first

    return first_down_yardline
Esempio n. 15
0
def play_id(gid=0, home='', away='', play_info={}, prechecked_gid=False):
    """
    Finds the play ID of a particular play

    Parameters
    ----------
    gid: an integer of a game_id
    home: a string representing the home team's team code
    away: a string representing the away team's team code
    play_info: a dictionary of parameters to use for subsetting. The keys MUST
        be columns in the plays data to be used. If not, they will be ignored
    prechecked_gid: a boolean of whether or not the game ID has been prechecked

    Returns
    -------
    pid: an integer of a play_id
    """
    # Game ID should be the primary lookup tool, so start with loading the
    # game's data if this is passed
    if gid != 0:
        # If the game ID is not already checked, check the game ID first
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

    # If the game ID is not passed, then try to get a game ID based on the home
    # and away team. If this yields nothing, then load all games
    if home != '' or away != '':
        home = check.team_code(home)
        away = check.team_code(away)

        gid = game_id(home, away)
        prechecked_gid = True

    # Load in plays from the identified game, or from all games if game ID = 0
    plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid)

    # Subset by the information about the play in the parameter play_info
    if bool(play_info):
        for key, val in play_info.items():
            # If the desired parameter is not in the columns of the plays data,
            # alert user and skip this subsetting parameter
            if key not in plays_from_game.columns:
                print(f'{key} is not a valid column to use for subsetting as'
                      ' it does not appear in the dataset.')
                continue

            # If the value passed in the plays_info dictionary is a list, use
            # the .isin() method for subsetting
            if type(val) == list:
                plays_from_game = plays_from_game[
                    plays_from_game[f'{key}'].isin(val)]

            # Otherwise, use the key and value alone
            else:
                plays_from_game = plays_from_game[plays_from_game[f'{key}'] ==
                                                  val]

    # If the passed parameters are enough to identify the play, there
    # should only be one play ID remaining. Return this value
    if len(plays_from_game) == 1:
        pid = plays_from_game['play_id'].values[0]

    else:
        for i, play in plays_from_game.iterrows():
            print(f'{play.game_id} -- {play.play_id} -- '
                  f'{play.down_dist_summary}')

        gid = input('Which game ID were you looking for?\nGame ID: ')
        pid = input('Which play of the above are you looking for?\nPlay ID: ')

        gid = check.game_id(gid)
        prechecked_gid = True
        pid = check.play_id(gid, pid, prechecked_gid)

    return pid