Esempio n. 1
0
def game_id(gid):
    """
    Checks a game ID exists in the provided dataset. Will prompt until
    a valid game ID is provided
    
    Parameters
    ----------
    gid: an integer of a game_id

    Returns
    -------
    gid: a validated integer of a game_id
    """
    # Load in the schedule information
    games = load.games_data()

    # Get a list of all possible game IDs
    valid_game_ids = games['game_id'].tolist()
    game_id_valid = False

    while not game_id_valid:
        if type(gid) != int:
            try:
                gid = int(gid)
            except:
                pass
        # If the game ID is a valid game ID, break out of the loop
        if gid in valid_game_ids:
            game_id_valid = True

        # If not, force the user to supply a new game ID
        else:
            print(f'{gid} is not a valid game ID.')
            week = int(
                input('If the week number is known, enter it now, or '
                      'else enter 0 to see a list of all games: '))
            # If user knows what week the game took place, allow them to only
            # see games from that week
            if week != 0:
                week = week_number(week)
                week_games = games[games['week'] == week]
                print(f'\nGAMES IN WEEK {week}:\n')
                for i, game in week_games.iterrows():
                    print(f'{game.game_id} -- {game.away} @ {game.home}')

                gid = int(input('Game ID: '))

            # Otherwise, show all games
            else:
                print('\nALL GAMES:\n')
                for i, game in games.iterrows():
                    print(f'{game.game_id} -- {game.away} @ {game.home}')

                gid = int(input('Game ID: '))

    return gid
Esempio n. 2
0
def game_id(home, away):
    """
    Finds the game_id of a game between the home and away team. The function
    checks whether or not the teams are valid, and if the teams are reversed,
    will provide the correct game_id for the meeting of these two teams
    
    Parameters
    ----------
    home: a string of the home team's code
    away: a string of the away team's code

    Returns
    -------
    desired_game_id: the game_id of the game in which home hosted away
    """
    # Validate that the home and away team codes supplied are valid team codes
    home = check.team_code(home)
    away = check.team_code(away)

    game_found = False
    while not game_found:
        # Bring in the schedule information
        games = load.games_data()

        # Check if the game existed as supplied
        desired_game = games[(games['home'] == home) & (games['away'] == away)]

        # If it did, break out of the loop
        if len(desired_game) == 1:
            game_found = True

        # Otherwise, alert user that the home team did not host the away team
        else:
            print(f'{home} did not host {away}. Checking if {away} hosted '
                  f'{home}')

            # Check if away team hosted home team
            desired_game = games[(games['home'] == away)
                                 & (games['away'] == home)]

            # If they did, break out of the loop
            if len(desired_game) == 1:
                game_found = True

            else:
                # Otherwise, prompt user to supply two new team codes
                print(f'{home} and {away} did not play each other in this '
                      'dataset')
                home = check.team_code('')
                away = check.team_code('')

    # Once a game has been identified, give back the game ID for the game
    desired_game_id = desired_game['game_id'].iloc[0]

    return desired_game_id
Esempio n. 3
0
def game_week(gid):
    """
    Finds the week in which a particular game was played

    Parameters
    ----------
    gid: an integer of a game_id

    Returns
    -------
    week: an integer representing the week the game was played in
    """
    # Validate the game ID
    gid = check.game_id(gid)

    # Bring in the schedule data
    games = load.games_data()

    # Get the week corresponding to the game ID provided
    week = games.loc[games['game_id'] == gid, 'week'].iloc[0]

    return week
Esempio n. 4
0
def game_teams(gid):
    """
    Finds the teams that played in a specified game (via game_id)

    Parameters
    ----------
    gid: an integer of a game_id

    Returns
    -------
    home: a string of the home team's code
    away: a string of the away team's code
    """
    # Validate the game ID
    gid = check.game_id(gid)

    # Bring in the schedule data
    games = load.games_data()

    # Get the home and away team codes
    home = games.loc[games['game_id'] == gid, 'home'].iloc[0]
    away = games.loc[games['game_id'] == gid, 'away'].iloc[0]

    return home, away
def tracking_and_plays(gid=0,
                       pid=0,
                       tracking=pd.DataFrame(),
                       play=pd.DataFrame()):
    """
    Merges play and tracking data together to centralize data source

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    tracking: a dataframe of tracking data that can be used to speed up
        data loading
    play: a dataframe of play-level data that can be used to speed up data
        loading

    Returns
    -------
    tracking_and_plays: a merged dataframe of tracking and play-level data
    """
    # If no tracking data is provided...
    if tracking.empty:

        # If a game ID and play ID are both provided, load the tracking data
        # for the play from that game
        if gid != 0 and pid != 0:
            week = find.game_week(gid)
            tracking = load.tracking_data(gid=gid, pid=pid, week=week)

        # If a game ID is provided but not a play ID, load all tracking for the
        # game
        elif gid != 0 and pid == 0:
            week = find.game_week(gid)
            tracking = load.tracking_data(gid=gid, week=week)

        # If a play ID is provided but not a game ID, load all tracking for
        # plays with matching play IDs
        elif gid == 0 and pid != 0:
            tracking = load.tracking_data(pid=pid, week=0)

        # If no game ID is provided and no play ID is provided, then load all
        # tracking data from all weeks
        else:
            tracking = load.tracking_data()

    # If no play data is provided...
    if play.empty:

        # If a game ID and play ID are both provided, load the plays data for
        # the play from that game
        if gid != 0 and pid != 0:
            play = load.plays_data(gid=gid, pid=pid)

        # If a game ID is provided but not a play ID, load all plays data for
        # the game
        elif gid != 0 and pid == 0:
            play = load.plays_data(gid=gid)

        # If a play ID is provided but not a game ID, load all plays data for
        # plays with matching play IDs
        elif gid == 0 and pid != 0:
            play = load.plays_data(pid=pid)

        # If no game ID is provided and no play ID is provided, then load all
        # plays data
        else:
            play = load.plays_data()

    tracking_and_plays = pd.merge(left=tracking,
                                  right=play,
                                  how='inner',
                                  on=['game_id', 'play_id'])

    games_data = load.games_data()[['game_id', 'home', 'away', 'week']]

    tracking_and_plays = pd.merge(left=tracking_and_plays,
                                  right=games_data,
                                  how='inner',
                                  on='game_id')

    tracking_and_plays['offensive_team'] = \
        tracking_and_plays['possession_team']

    tracking_and_plays['defensive_team'] = np.where(
        tracking_and_plays['offensive_team'] == tracking_and_plays['home'],
        tracking_and_plays['away'], tracking_and_plays['home'])

    return tracking_and_plays
def plays_and_games(gid=0, home='', away='', prechecked_gid=False):
    """
    Merges play and game data together to better illustrate what plays are
    being run by which team and against which opponent

    Parameters
    ----------
    gid: an integer of a game_id
    home: a string representing the home team's team code
    away: a string representing the away team's team code
    play_info: a dictionary of parameters to use for subsetting. The keys MUST
        be columns in the plays data to be used. If not, they will be ignored
    prechecked_gid: a boolean of whether or not the game ID has been prechecked

    Returns
    -------
    plays_from_game: a merged dataframe of play and game data

    """
    if gid != 0:
        # If the game ID is not already checked, check the game ID first
        if not prechecked_gid:
            gid = check.game_id(gid)
            prechecked_gid = True

    # If the game ID is not passed, then try to get a game ID based on the home
    # and away team. If this yields nothing, then load all games
    if home != '' or away != '':
        home = check.team_code(home)
        away = check.team_code(away)

        gid = find.game_id(home, away)
        prechecked_gid = True

    # Load in plays from the identified game, or from all games if game ID = 0
    plays_from_game = load.plays_data(gid=gid, prechecked_gid=prechecked_gid)

    # Load in the games data to merge
    games_data = load.games_data(
        gid, prechecked_gid)[['game_id', 'home', 'away', 'week']]

    plays_from_game = pd.merge(left=plays_from_game,
                               right=games_data,
                               how='inner',
                               on='game_id')

    plays_from_game['offensive_team'] = plays_from_game['possession_team']
    plays_from_game['defensive_team'] = np.where(
        plays_from_game['offensive_team'] == plays_from_game['home'],
        plays_from_game['away'], plays_from_game['home'])

    plays_from_game = plays_from_game[[
        'game_id', 'play_id', 'play_description', 'quarter', 'down',
        'yds_to_go', 'possession_team', 'play_type', 'yardline_side',
        'yardline_number', 'offense_formation', 'personnel_offense',
        'defenders_in_box', 'n_pass_rushers', 'personnel_defense',
        'type_dropback', 'presnap_away_score', 'presnap_home_score',
        'game_clock', 'absolute_yard_line', 'penalty_code', 'penalty_player',
        'pass_result', 'offensive_play_result', 'play_result', 'epa',
        'is_defensive_pi', 'down_dist_summary', 'home', 'away',
        'offensive_team', 'defensive_team', 'week'
    ]]

    return plays_from_game
Esempio n. 7
0
def first_down_line(gid,
                    pid,
                    tracking=pd.DataFrame(),
                    prechecked_gid=False,
                    prechecked_pid=False):
    """
    Finds what yardline is needed to be gained to achieve a first down

    Parameters
    ----------
    gid: an integer of a game_id
    pid: an integer of a play_id
    tracking: a set of tracking information pertaining to a particular play.
        If none is provided, the entire tracking set will be used. This is
        the default
    prechecked_gid: a boolean of whether or not the game ID has been checked
        before being passed to the function
    prechecked_pid: a boolean of whether or not the play ID has been checked
         before being passed to the function

    Returns
    -------
    first_down_yardline: a float representing the absolute yardline needed
        to achieve a first down
    """
    if not prechecked_gid:
        # Validate the game ID
        gid = check.game_id(gid)
        prechecked_gid = True

    if not prechecked_pid:
        # Validate the play ID
        pid = check.play_id(gid, pid)
        prechecked_pid = True

    # Load in the schedule data
    games = load.games_data(gid, prechecked_gid)

    # Get the week of the game so that the correct tracking information can be
    # loaded
    week = games.loc[games['game_id'] == gid, 'week'].iloc[0]

    # Get the line of scrimmage and number of yards needed to achieve a first
    # down
    los = line_of_scrimmage(gid, pid)
    distance_to_first = yards_to_go(gid, pid)

    # Load in the appropriate tracking data, then subset to only be for the
    # desired play
    if tracking.empty:
        tracking = load.tracking_data(gid,
                                      pid,
                                      week,
                                      prechecked_gid=True,
                                      prechecked_pid=True,
                                      prechecked_week=True)

    # Get the direction of play. If the play is going right, yards will be
    # added, otherwise they will be subtracted
    play_direction = tracking['play_direction'].iloc[0]

    # Calculate the yardline needed to be gained to achieve a first down
    if play_direction == 'right':
        first_down_yardline = los + distance_to_first
    else:
        first_down_yardline = los - distance_to_first

    return first_down_yardline