Esempio n. 1
0
def update_player_logs_from_page(pbp, season, game):
    """
    Takes the game play by play and adds players to the master player log file, noting that they were on the roster
    for this game, which team they played for, and their status (P for played, S for scratch).

    :param season: int, the season
    :param game: int, the game
    :param pbp: json, the pbp of the game

    :return: nothing
    """

    # Get players who played, and scratches, from boxscore
    home_played = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'home', 'players')
    road_played = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'away', 'players')
    home_scratches = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'home', 'scratches')
    road_scratches = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore', 'teams', 'away', 'scratches')

    # Played are both dicts, so make them lists
    home_played = [int(pid[2:]) for pid in home_played]
    road_played = [int(pid[2:]) for pid in road_played]

    # Played may include scratches, so make sure to remove them
    home_played = list(set(home_played).difference(set(home_scratches)))
    road_played = list(set(road_played).difference(set(road_scratches)))

    # Get home and road names
    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # Update player logs
    update_player_log_file(home_played, season, game, gameinfo['Home'], 'P')
    update_player_log_file(home_scratches, season, game, gameinfo['Home'], 'S')
    update_player_log_file(road_played, season, game, gameinfo['Road'], 'P')
    update_player_log_file(road_scratches, season, game, gameinfo['Road'], 'S')
Esempio n. 2
0
def get_player_info_from_url(playerid):
    """
    Gets ID, Name, Hand, Pos, DOB, Height, Weight, and Nationality from the NHL API.

    :param playerid: int, the player id

    :return: dict with player ID, name, handedness, position, etc
    """
    page = helpers.try_url_n_times(get_player_url(playerid))
    data = json.loads(page)

    info = {}
    vars_to_get = {'ID': ['people', 0, 'id'],
                   'Name': ['people', 0, 'fullName'],
                   'Hand': ['people', 0, 'shootsCatches'],
                   'Pos': ['people', 0, 'primaryPosition', 'code'],
                   'DOB': ['people', 0, 'birthDate'],
                   'Height': ['people', 0, 'height'],
                   'Weight': ['people', 0, 'weight'],
                   'Nationality': ['people', 0, 'nationality']}
    for key, val in vars_to_get.items():
        info[key] = helpers.try_to_access_dict(data, *val)

    # Remove the space in the middle of height
    if info['Height'] is not None:
        info['Height'] = info['Height'].replace(' ', '')
    return info
def update_schedule_with_coaches(pbp, season, game):
    """
    Uses the PbP to update coach info for this game.

    :param pbp: json, the pbp for this game
    :param season: int, the season
    :param game: int, the game

    :return: nothing
    """

    homecoach = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore',
                                           'teams', 'home', 'coaches', 0,
                                           'person', 'fullName')
    roadcoach = helpers.try_to_access_dict(pbp, 'liveData', 'boxscore',
                                           'teams', 'away', 'coaches', 0,
                                           'person', 'fullName')
    _update_schedule_with_coaches(season, game, homecoach, roadcoach)
def update_schedule_with_result_using_pbp(pbp, season, game):
    """
    Uses the PbP to update results for this game.

    :param pbp: json, the pbp for this game
    :param season: int, the season
    :param game: int, the game

    :return: nothing
    """

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    result = None  # In case they have the same score. Like 2006 10009 has incomplete data, shows 0-0

    # If game is not final yet, don't do anything
    if gameinfo['Status'] != 'Final':
        return False

    # If one team one by at least two, we know it was a regulation win
    if gameinfo['HomeScore'] >= gameinfo['RoadScore'] + 2:
        result = 'W'
    elif gameinfo['RoadScore'] >= gameinfo['HomeScore'] + 2:
        result = 'L'
    else:
        # Check for the final period
        finalplayperiod = helpers.try_to_access_dict(pbp, 'liveData',
                                                     'linescore',
                                                     'currentPeriodOrdinal')

        # Identify SO vs OT vs regulation
        if finalplayperiod is None:
            pass
        elif finalplayperiod == 'SO':
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'SOW'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'SOL'
        elif finalplayperiod[-2:] == 'OT':
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'OTW'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'OTL'
        else:
            if gameinfo['HomeScore'] > gameinfo['RoadScore']:
                result = 'W'
            elif gameinfo['RoadScore'] > gameinfo['HomeScore']:
                result = 'L'

    update_schedule_with_result(season, game, result)
Esempio n. 5
0
def check_default_player_id(playername):
    """
    E.g. For Mike Green, I should automatically assume we mean 8471242 (WSH/DET), not 8468436.
    Returns None if not in dict.
    Ideally improve code so this isn't needed.

    :param playername: str

    :return: int, or None
    """
    # TODO gradually add to this
    return helpers.try_to_access_dict({'Mike Green': 8471242,
                                       'Francois Beauchemin': 8467400,
                                       'Erik Karlsson': 8474578,
                                       'Mike Hoffman': 8474884,
                                       'Tyler Johnson': 8474870,
                                       'Josh Anderson': 8476981,
                                       'Sebastian Aho': 8478427}, playername)
Esempio n. 6
0
def read_events_from_page(rawpbp, season, game):
    """
    This method takes the json pbp and returns a pandas dataframe with the following columns:

    * Index: int, index of event
    * Period: str, period of event. In regular season, could be 1, 2, 3, OT, or SO. In playoffs, 1, 2, 3, 4, 5...
    * MinSec: str, m:ss, time elapsed in period
    * Time: int, time elapsed in game
    * Event: str, the event name
    * Team: int, the team id. Note that this is switched to blocked team for blocked shots to ease Corsi calculations.
    * Actor: int, the acting player id. Switched with recipient for blocks (see above)
    * ActorRole: str, e.g. for faceoffs there is a "Winner" and "Loser". Switched with recipient for blocks (see above)
    * Recipient: int, the receiving player id. Switched with actor for blocks (see above)
    * RecipientRole: str, e.g. for faceoffs there is a "Winner" and "Loser". Switched with actor for blocks (see above)
    * X: int, the x coordinate of event (or NaN)
    * Y: int, the y coordinate of event (or NaN)
    * Note: str, additional notes, which may include penalty duration, assists on a goal, etc.

    :param rawpbp: json, the raw json pbp
    :param season: int, the season
    :param game: int, the game

    :return: pandas dataframe, the pbp in a nicer format
    """
    pbp = helpers.try_to_access_dict(rawpbp, 'liveData', 'plays', 'allPlays')
    if pbp is None:
        return

    gameinfo = schedules.get_game_data_from_schedule(season, game)
    pbpdf = _create_pbp_df_json(pbp, gameinfo)
    if len(pbpdf) == 0:
        return pbpdf

    pbpdf = _add_scores_to_pbp(pbpdf, gameinfo)
    pbpdf = _add_times_to_pbp(pbpdf)

    return pbpdf
Esempio n. 7
0
def read_shifts_from_page(rawtoi, season, game):
    """
    Turns JSON shift start-ends into TOI matrix with one row per second and one col per player

    :param rawtoi: dict, json from NHL API
    :param season: int, the season
    :param game: int, the game

    :return: dataframe
    """
    toi = rawtoi['data']
    if len(toi) == 0:
        return
    ids = ['' for _ in range(len(toi))]
    periods = [0 for _ in range(len(toi))]
    starts = ['0:00' for _ in range(len(toi))]
    ends = ['0:00' for _ in range(len(toi))]
    teams = ['' for _ in range(len(toi))]
    durations = [0 for _ in range(len(toi))]

    # The shifts are ordered shortest duration to longest.
    for i, dct in enumerate(toi):
        ids[i] = helpers.try_to_access_dict(dct, 'playerId', default_return='')
        periods[i] = helpers.try_to_access_dict(dct,
                                                'period',
                                                default_return=0)
        starts[i] = helpers.try_to_access_dict(dct,
                                               'startTime',
                                               default_return='0:00')
        ends[i] = helpers.try_to_access_dict(dct,
                                             'endTime',
                                             default_return='0:00')
        durations[i] = helpers.try_to_access_dict(dct,
                                                  'duration',
                                                  default_return=0)
        teams[i] = helpers.try_to_access_dict(dct, 'teamId', default_return='')

    gameinfo = schedules.get_game_data_from_schedule(season, game)

    # I originally took start times at face value and subtract 1 from end times
    # This caused problems with joining events--when there's a shot and the goalie freezes immediately
    # then, when you join this to the pbp, you'll get the players on the ice for the next draw as having
    # been on ice for the shot.
    # So I switch to adding 1 to start times, and leaving end times as-are.
    # That means that when joining on faceoffs, add 1 to faceoff times.
    # Exception: start time 1 --> start time 0
    startmin = [x[:x.index(':')] for x in starts]
    startsec = [x[x.index(':') + 1:] for x in starts]
    starttimes = [
        1200 * (p - 1) + 60 * int(m) + int(s) + 1
        for p, m, s in zip(periods, startmin, startsec)
    ]
    # starttimes = [0 if x == 1 else x for x in starttimes]
    endmin = [x[:x.index(':')] for x in ends]
    endsec = [x[x.index(':') + 1:] for x in ends]
    # There is an extra -1 in endtimes to avoid overlapping start/end
    endtimes = [
        1200 * (p - 1) + 60 * int(m) + int(s)
        for p, m, s in zip(periods, endmin, endsec)
    ]

    durationtime = [e - s for s, e in zip(starttimes, endtimes)]

    df = pd.DataFrame({
        'PlayerID': ids,
        'Period': periods,
        'Start': starttimes,
        'End': endtimes,
        'Team': teams,
        'Duration': durationtime
    })

    return _finish_toidf_manipulations(df, season, game)
Esempio n. 8
0
def _create_pbp_df_json(pbp, gameinfo):
    """
    Creates a pandas dataframe from the pbp, making use of gameinfo (from schedule file) as well

    :param pbp: dict, from pbp json
    :param gameinfo: dict, single row from schedule file

    :return: dataframe
    """

    index = [i for i in range(len(pbp))]
    period = ['' for _ in range(len(pbp))]
    times = ['0:00' for _ in range(len(pbp))]
    event = ['NA' for _ in range(len(pbp))]

    team = [-1 for _ in range(len(pbp))]
    p1 = [-1 for _ in range(len(pbp))]
    p1role = ['' for _ in range(len(pbp))]
    p2 = [-1 for _ in range(len(pbp))]
    p2role = ['' for _ in range(len(pbp))]
    xs = [np.NaN for _ in range(len(pbp))]
    ys = [np.NaN for _ in range(len(pbp))]
    note = ['' for _ in range(len(pbp))]

    for i in range(len(pbp)):
        period[i] = helpers.try_to_access_dict(pbp,
                                               i,
                                               'about',
                                               'period',
                                               default_return='')
        times[i] = helpers.try_to_access_dict(pbp,
                                              i,
                                              'about',
                                              'periodTime',
                                              default_return='0:00')
        event[i] = helpers.try_to_access_dict(pbp,
                                              i,
                                              'result',
                                              'event',
                                              default_return='NA')

        xs[i] = float(
            helpers.try_to_access_dict(pbp,
                                       i,
                                       'coordinates',
                                       'x',
                                       default_return=np.NaN))
        ys[i] = float(
            helpers.try_to_access_dict(pbp,
                                       i,
                                       'coordinates',
                                       'y',
                                       default_return=np.NaN))
        team[i] = helpers.try_to_access_dict(pbp,
                                             i,
                                             'team',
                                             'id',
                                             default_return=-1)

        p1[i] = helpers.try_to_access_dict(pbp,
                                           i,
                                           'players',
                                           0,
                                           'player',
                                           'id',
                                           default_return=-1)
        p1role[i] = helpers.try_to_access_dict(pbp,
                                               i,
                                               'players',
                                               0,
                                               'playerType',
                                               default_return='')
        p2[i] = helpers.try_to_access_dict(pbp,
                                           i,
                                           'players',
                                           1,
                                           'player',
                                           'id',
                                           default_return=-1)
        p2role[i] = helpers.try_to_access_dict(pbp,
                                               i,
                                               'players',
                                               1,
                                               'playerType',
                                               default_return='')

        note[i] = helpers.try_to_access_dict(pbp,
                                             i,
                                             'result',
                                             'description',
                                             default_return='')
        if event[i] == 'Goal':
            # Two changes to make
            # First, make the recipient of this goal the opposing goalie
            # Second, replace player names with player IDs in the description of this goal (scorer and assists)
            p2[i] = None
            p2role[i] = None
            for j in range(len(pbp[i]['players'])):
                pid = helpers.try_to_access_dict(pbp,
                                                 i,
                                                 'players',
                                                 j,
                                                 'player',
                                                 'id',
                                                 default_return=None)
                pname = helpers.try_to_access_dict(pbp,
                                                   i,
                                                   'players',
                                                   j,
                                                   'player',
                                                   'fullName',
                                                   default_return=None)
                prole = helpers.try_to_access_dict(pbp,
                                                   i,
                                                   'players',
                                                   j,
                                                   'playerType',
                                                   default_return='')
                if prole == 'Goalie':
                    p2[i] = pid
                    p2role[i] = prole
                elif pid is not None:
                    note[i] = note[i].replace(pname, str(int(pid)))

    # Switch blocked shots from being an event for player who blocked, to player who took shot that was blocked
    # That means switching team attribution and actor/recipient.
    # TODO: why does schedule have str, not int, home and road here?
    switch_teams = {
        gameinfo['Home']: gameinfo['Road'],
        gameinfo['Road']: gameinfo['Home']
    }
    team_sw = [
        team[i] if event[i] != "Blocked Shot" else switch_teams[team[i]]
        for i in range(len(team))
    ]
    p1_sw = [
        p1[i] if event[i] != "Blocked Shot" else p2[i] for i in range(len(p1))
    ]
    p2_sw = [
        p2[i] if event[i] != "Blocked Shot" else p1[i] for i in range(len(p2))
    ]
    p1role_sw = [
        p1role[i] if event[i] != "Blocked Shot" else p2role[i]
        for i in range(len(p1role))
    ]
    p2role_sw = [
        p2role[i] if event[i] != "Blocked Shot" else p1role[i]
        for i in range(len(p2role))
    ]

    pbpdf = pd.DataFrame({
        'Index': index,
        'Period': period,
        'MinSec': times,
        'Event': event,
        'Team': team_sw,
        'Actor': p1_sw,
        'ActorRole': p1role_sw,
        'Recipient': p2_sw,
        'RecipientRole': p2role_sw,
        'X': xs,
        'Y': ys,
        'Note': note
    })
    return pbpdf
Esempio n. 9
0
def _create_schedule_dataframe_from_json(jsondict):
    """
    Reads game, game type, status, visitor ID, home ID, visitor score, and home score for each game in this dict

    :param jsondict: a dictionary formed from season schedule json

    :return: pandas dataframe
    """
    dates = []
    games = []
    gametypes = []
    statuses = []
    vids = []
    vscores = []
    hids = []
    hscores = []
    venues = []
    for datejson in jsondict['dates']:
        try:
            date = datejson.get('date', None)
            for gamejson in datejson['games']:
                game = int(
                    str(helpers.try_to_access_dict(gamejson, 'gamePk'))[-5:])
                gametype = helpers.try_to_access_dict(gamejson, 'gameType')
                status = helpers.try_to_access_dict(gamejson, 'status',
                                                    'detailedState')
                vid = helpers.try_to_access_dict(gamejson, 'teams', 'away',
                                                 'team', 'id')
                vscore = int(
                    helpers.try_to_access_dict(gamejson, 'teams', 'away',
                                               'score'))
                hid = helpers.try_to_access_dict(gamejson, 'teams', 'home',
                                                 'team', 'id')
                hscore = int(
                    helpers.try_to_access_dict(gamejson, 'teams', 'home',
                                               'score'))
                venue = helpers.try_to_access_dict(gamejson, 'venue', 'name')

                dates.append(date)
                games.append(game)
                gametypes.append(gametype)
                statuses.append(status)
                vids.append(vid)
                vscores.append(vscore)
                hids.append(hid)
                hscores.append(hscore)
                venues.append(venue)
        except KeyError:
            pass
    df = pd.DataFrame({
        'Date': dates,
        'Game': games,
        'Type': gametypes,
        'Status': statuses,
        'Road': vids,
        'RoadScore': vscores,
        'Home': hids,
        'HomeScore': hscores,
        'Venue': venues
    }).sort_values('Game')
    return df