Esempio n. 1
0
def parse_event(event):
    """
    Parses a single event when the info is in a json format
    :param event: json of event
    :return: dictionary with the info
    """
    play = dict()

    play['Event_Type'] = str(change_event_name(event['result']['eventTypeId']))
    play['Event_Desc'] = event['result']['description']
    # play['Event_Num'] = event['about']['eventIdx']
    # commented out as eventIdx in json does not match event num from html
    play['Period'] = event['about']['period']
    play['Time_Elapsed'] = convert_to_seconds(event['about']['periodTime'])
    play['Away_Score'] = event['about']['goals']['away']
    play['Home_Score'] = event['about']['goals']['home']

    if event['result']['eventTypeId'] == "SHOT":
        if "Snap" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Snap"
        elif "Slap" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Slap"
        elif "Backhand" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Backhand"
        elif "Tip-In" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Tip-In"
        elif "Wrap-around" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Wrap-around"
        elif "Deflected" in event['result']['secondaryType']:
            play["Secondary_Type"] = "Deflected"

    if event['result']['eventTypeId'] == "MISSED_SHOT":
        play["Secondary_Type"] = event['result']['description'].split('-')[1]

    if event['result']['eventTypeId'] == "PENALTY":
        play["Secondary_Type"] = event['result']['secondaryType']
        play["Penl_Length"] = event['result']['penaltyMinutes']

    if event['result']['eventTypeId'] == "GOAL":
        play["Secondary_Type"] = event['result']['secondaryType']

    # If there's a players key that means an event occurred on the play.
    if 'players' in event.keys():
        for i in range(len(event['players'])):
            play['P{}_Name'.format(i + 1)] = fix_name(
                event['players'][i]['player']['fullName'].upper())
            play['P{}_Id'.format(i + 1)] = str(
                event['players'][i]['player']['id'])

            # Coordinates aren't always there
            try:
                play['xC'] = event['coordinates']['x']
                play['yC'] = event['coordinates']['y']
            except KeyError:
                play['xC'] = ''
                play['yC'] = ''

    return play
Esempio n. 2
0
def analyze_shifts(shift, name, team):
    """
    Analyze shifts for each player when using.
    Prior to this each player (in a dictionary) has a list with each entry being a shift.
    This function is only used for the html
    :param shift: info on shift
    :param name: player name
    :param team: given team
    :return: dict with info for shift
    """
    shifts = dict()

    shifts['Player'] = name.upper()
    shifts['Period'] = '4' if shift[1] == 'OT' else shift[1]
    shifts['Team'] = fix_team(team.strip(' '))
    shifts['Shift'] = shift[0]
    shifts['Start'] = convert_to_seconds(shift[2].split('/')[0])
    shifts['End'] = convert_to_seconds(shift[3].split('/')[0])
    shifts['Duration'] = convert_to_seconds(shift[4].split('/')[0])

    return shifts
Esempio n. 3
0
def parse_shift(shift):
    """
    Parse shift for json
    :param shift: json for shift
    :return: dict with shift info
    """

    shift_dict = dict()
    name = fix_name(' '.join([shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper()]))
    shift_dict['Period'] = shift['period']
    shift_dict['Player'] = name
    shift_dict['Team'] = shift['teamAbbrev']
    shift_dict['Shift'] = shift['shiftNumber']
    if shift['eventDescription'] is None:
        shift_dict['Start'] = convert_to_seconds(shift['startTime'])
        shift_dict['End'] = convert_to_seconds(shift['endTime'])
        shift_dict['Duration'] = convert_to_seconds(shift['duration'])
    else:
        shift_dict = dict()

    return shift_dict
Esempio n. 4
0
def parse_event(event):
    """
    Parse each event. In the string each field is separated by a '~'.
    Relevant for here: The first two are the x and y coordinates. And the 4th and 5th are the time elapsed and period.

    :param event: string with info

    :return: return dict with relevant info
    """
    info = dict()
    fields = event.split('~')

    # Shootouts screw everything up so don't bother...coordinates don't matter there either way
    if fields[4] == '5':
        return None

    info['xC'] = int(fields[0])
    info['yC'] = int(fields[1])
    info['Time_Elapsed'] = convert_to_seconds(fields[3])
    info['Period'] = fields[4]
    info['Event_Type'] = event_type(fields[8].upper())

    return info
Esempio n. 5
0
def parse_pbp(gameid):
    """
    parse cleaned html and create pd Dataframe
    :param gameid: game id
    :return: df where each row is an event in pb
    """

    html = get_pbp(gameid)
    events = clean_html_pbp(html)

    teams = dict()
    teams['away'] = events[0][6][:3]
    teams['home'] = events[0][7][:3]

    score = {'home': 0, 'away': 0}

    pbp_events = []

    event_types = {
        'PERIOD_START': 'PSTR',
        'FACEOFF': 'FAC',
        'BLOCKED_SHOT': 'BLOCK',
        'GAME_END': 'GEND',
        'GIVEAWAY': 'GIVE',
        'GOAL': 'GOAL',
        'HIT': 'HIT',
        'MISSED_SHOT': 'MISS',
        'PERIOD_END': 'PEND',
        'SHOT': 'SHOT',
        'STOP': 'STOP',
        'TAKEAWAY': 'TAKE',
        'PENALTY': 'PENL',
        'Early Intermission Start': 'EISTR',
        'Early Intermission End': 'EIEND',
        'Shootout Completed': 'SOC',
    }

    for i in events[:len(events)]:
        if i[4] in event_types.values() and i[0] != '#':
            play = dict()
            play['Home_Score'] = score['home']
            play['Away_Score'] = score['away']
            # play['Team_away'] = teams['away']
            # play['Team_home'] = teams['home']
            play['Event_Num'] = i[0]
            play['Period'] = i[1]
            # play['strength'] = i[2]
            play['Time_Elapsed'] = convert_to_seconds(i[3])
            play['Event_Type'] = i[4]
            play['Event_Desc'] = i[5]

            if 'Off. Zone' in i[5] and i[5][:3] == teams['home']:
                play['Home_Zone'] = 'OZ'
                play['Away_Zone'] = 'DZ'
            elif 'Off. Zone' in i[5] and i[5][:3] == teams['away']:
                play['Home_Zone'] = 'DZ'
                play['Away_Zone'] = 'OZ'
            elif 'Def. Zone' in i[5] and i[5][:3] == teams['home'] and i[
                    4] == 'BLOCK':
                play['Home_Zone'] = 'OZ'
                play['Away_Zone'] = 'DZ'
            elif 'Def. Zone' in i[5] and i[5][:3] == teams[
                    'home'] and i[4] != 'BLOCK':
                play['Home_Zone'] = 'DZ'
                play['Away_Zone'] = 'OZ'
            elif 'Def. Zone' in i[5] and i[5][:3] == teams['away'] and i[
                    4] == 'BLOCK':
                play['Home_Zone'] = 'DZ'
                play['Away_Zone'] = 'OZ'
            elif 'Def. Zone' in i[5] and i[5][:3] == teams[
                    'away'] and i[4] != 'BLOCK':
                play['Home_Zone'] = 'OZ'
                play['Away_Zone'] = 'DZ'
            elif 'Neu. Zone' in i[5]:
                play['Home_Zone'] = 'NZ'
                play['Away_Zone'] = 'NZ'

            if len(i[6]) > 0 and i[6][len(i[6]) - 1][2] == 'G':
                play['Away_Strength'] = len(i[6]) - 1
                play['G_Pulled_Away'] = False
            elif len(i[6]) > 0 and i[6][len(i[6]) - 1][2] != 'G':
                play['Away_Strength'] = len(i[6])
                play['G_Pulled_Away'] = True
            if len(i[7]) > 0 and i[7][len(i[7]) - 1][2] == 'G':
                play['Home_Strength'] = len(i[7]) - 1
                play['G_Pulled_Home'] = False
            elif len(i[7]) > 0 and i[7][len(i[7]) - 1][2] != 'G':
                play['Home_Strength'] = len(i[7])
                play['G_Pulled_Home'] = True

            for player in i[6]:
                n = i[6].index(player) + 1
                play['A{}Name'.format(n)] = fix_name(player[0])
                play['A{}Num'.format(n)] = player[1]
                play['A{}Pos'.format(n)] = player[2]
            for player in i[7]:
                n = i[7].index(player) + 1
                play['H{}Name'.format(n)] = fix_name(player[0])
                play['H{}Num'.format(n)] = player[1]
                play['H{}Pos'.format(n)] = player[2]

            if 'Penalty Shot' in i[5]:
                play['Penalty_Shot'] = True
            else:
                play['Penalty_Shot'] = False

            if i[4] == 'FAC':
                # MTL won Neu. Zone - MTL #11 GOMEZ vs TOR #37 BRENT
                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P2_Team'] = value
                regex = re.compile(r'(.{3})\s+#(\d+)')
                desc = regex.findall(i[5])  # [[Team, num], [Team, num]]
                for player in desc:
                    if player[0] == i[5][:3]:
                        play['P1_Num'] = player[1]
                    else:
                        play['P2_Num'] = player[1]

            penl = {
                'Hooking': 'Hooking',
                'Holding': 'Holding',
                'Holding the stick': 'Holding the stick',
                'Interference': 'Interference',
                'Roughing': 'Roughing',
                'Tripping': 'Tripping',
                'Unsportsmanlike conduct': 'Unsportsmanlike conduct',
                'Illegal equipment': 'Illegal equipment',
                'Diving': 'Embellishment',
                'Embellishment': 'Embellishment',
                'Broken stick': 'Broken stick',
                'Delaying Game-Puck over glass':
                'Delaying Game-Puck over glass',
                'Delay Gm - Face-off Violation':
                'Delay Gm - Face-off Violation',
                'Delay of game': 'Delay of game',
                'Delaying the game': 'Delay of game',
                'Delay of game - bench': 'Delay of game - bench',
                'Delaying Game-Ill. play goalie':
                'Delaying Game-Ill. play goalie',
                'Delaying Game-Smothering puck':
                'Delaying Game-Smothering puck',
                'Goalie leave crease': 'Goalie leave crease',
                'Face-off violation-bench': 'Face-off violation-bench',
                'Bench': 'Bench',
                'Illegal stick': 'Illegal stick',
                'Closing hand on puck': 'Closing hand on puck',
                'Throwing stick': 'Throwing stick',
                'Too many men/ice - bench': 'Too many men/ice - bench',
                'Abusive language - bench': 'Abuse of officials - bench',
                'Abuse of officials - bench': 'Abuse of officials - bench',
                'Interference on goalkeeper': 'Interference on goalkeeper',
                'Hi-sticking': 'High-sticking',
                'Hi stick - double minor': 'High-sticking',
                'Cross checking': 'Cross checking',
                'Cross check - double minor': 'Cross checking',
                'Slashing': 'Slashing',
                'Charging': 'Charging',
                'Boarding': 'Boarding',
                'Kneeing': 'Kneeing',
                'Clipping': 'Clipping',
                'Elbowing': 'Elbowing',
                'Spearing': 'Spearing',
                'Butt ending': 'Butt ending',
                'Head butting': 'Head butting',
                'Illegal check to head': 'Illegal check to head',
                'Fighting': 'Fighting',
                'Instigator': 'Instigator',
                'Abuse of officials': 'Abuse of officials',
                'Aggressor': 'Aggressor',
                'PS-': "Penalty Shot",
                'PS -': "Penalty Shot",
                'Misconduct': 'Misconduct',
                'Game misconduct': 'Game misconduct',
                'Game Misconduct': 'Game misconduct',
                'Match penalty': 'Game Misconduct',
                'Checking from behind': 'Checking from behind'
            }

            if i[4] == 'PENL':
                for pen in penl:
                    if pen in i[5]:
                        play['Secondary_Type'] = penl[pen]
                    elif pen not in penl:
                        play['Secondary_Type'] = 'misc'

                if '2 min' in i[5]:
                    play['Penl_Length'] = 2
                elif '4 min' in i[5]:
                    play['Penl_Length'] = 4
                elif '5 min' in i[5]:
                    play['Penl_Length'] = 5
                elif '10 min' in i[5]:
                    play['Penl_Length'] = 10

                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P2_Team'] = value
                if 'TEAM' in i[5]:
                    play['P1_Num'] = i[5][:3]
                else:
                    regex = re.compile(r'(.{3})\s+#(\d+)')
                    desc = regex.findall(i[5])  # [[Team, num], [Team, num]]
                    for player in desc:
                        if player[0] == i[5][:3]:
                            play['P1_Num'] = player[1]
                        elif player[0] != i[5][:3]:
                            play['P2_Num'] = player[1]

            if i[4] == 'HIT':
                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P2_Team'] = value
                regex = re.compile(r'(.{3})\s+#(\d+)')
                desc = regex.findall(i[5])  # [[Team, num], [Team, num]]
                for player in desc:
                    if player[0] == i[5][:3]:
                        play['P1_Num'] = player[1]
                    elif player[0] != i[5][:3]:
                        play['P2_Num'] = player[1]

            shot_types = [
                'Wrist', 'Snap', 'Slap', 'Deflected', 'Tip-In', 'Backhand',
                'Wrap-around'
            ]

            if i[4] == 'BLOCK':
                play['P2_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P1_Team'] = value
                for shot in shot_types:
                    if shot in i[5]:
                        play['Secondary_Type'] = shot
                regex = re.compile(r'(.{3})\s+#(\d+)')
                desc = regex.findall(i[5])  # [[Team, num], [Team, num]]
                for player in desc:
                    if player[0] == i[5][:3]:
                        play['P2_Num'] = player[1]
                    elif player[0] != i[5][:3]:
                        play['P1_Num'] = player[1]

            if i[4] == 'SHOT':
                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P4_Team'] = value
                for shot in shot_types:
                    if shot in i[5]:
                        play['Secondary_Type'] = shot
                regex = re.compile(r'#(\d+)')
                desc = regex.search(i[5]).groups()  # num
                play['P1_Num'] = desc[0]
                for key, value in teams.items():
                    if value != i[5][:3] and key == 'away':
                        play['P4_Num'] = i[6][len(i[6]) - 1][1]
                    elif value != i[5][:3] and key == 'home':
                        play['P4_Num'] = i[7][len(i[7]) - 1][1]
                dist = i[5].split(',')
                feet = re.findall('\d+', dist[len(dist) - 1][1:])
                play['Dist'] = feet[0]

            miss_types = ['Over Net', 'Wide of Net', 'Crossbar', 'Goalpost']

            if i[4] == 'MISS':
                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P4_Team'] = value
                for shot in shot_types:
                    if shot in i[5]:
                        play['Secondary_Type'] = shot
                for miss in miss_types:
                    if miss in i[5]:
                        play['Tertiary_Type'] = miss
                regex = re.compile(r'#(\d+)')
                desc = regex.search(i[5]).groups()  # num
                play['P1_Num'] = desc[0]
                for key, value in teams.items():
                    if value != i[5][:3] and key == 'away':
                        play['P4_Num'] = i[6][len(i[6]) - 1][1]
                    elif value != i[5][:3] and key == 'home':
                        play['P4_Num'] = i[7][len(i[7]) - 1][1]
                dist = i[5].split(',')
                feet = re.findall('\d+', dist[len(dist) - 1][1:])
                play['Dist'] = feet[0]

            if i[4] == 'GIVE' or i[4] == 'TAKE':
                play['P1_Team'] = i[5][:3]
                regex = re.compile(r'#(\d+)')
                desc = regex.search(i[5]).groups()  # num
                play['P1_Num'] = desc[0]

            if i[4] == 'GOAL':
                play['P1_Team'] = i[5][:3]
                for value in teams.values():
                    if value != i[5][:3]:
                        play['P4_Team'] = value
                for shot in shot_types:
                    if shot in i[5]:
                        play['Secondary_Type'] = shot
                for key, value in teams.items():
                    if value != i[5][:3] and key == 'away':
                        play['P4_Num'] = i[6][len(i[6]) - 1][1]
                    elif value != i[5][:3] and key == 'home':
                        play['P4_Num'] = i[7][len(i[7]) - 1][1]
                regex = re.compile(r'#(\d+)\s+')
                desc = regex.findall(
                    i[5])  # [num] -> ranging from 1 to 3 indices
                play['P1_Num'] = desc[0]
                if len(desc) >= 2:
                    play['P2_Num'] = desc[1]
                    play['P2_Team'] = i[5][:3]
                    if len(desc) == 3:
                        play['P3_Num'] = desc[2]
                        play['P3_Team'] = i[5][:3]

                dist = i[5].split(',')
                for d in dist:
                    if 'ft' in d:
                        feet = re.findall('\d+', d[1:])
                        play['Dist'] = feet[0]

                for key, value in teams.items():
                    if value == i[5][:3] and key == 'away':
                        score['away'] += 1
                    elif value == i[5][:3] and key == 'home':
                        score['home'] += 1

            if i[4] == 'STOP':
                if 'ICING' in i[5]:
                    play['Secondary_Type'] = 'Icing'
                elif 'OFFSIDE' in i[5]:
                    play['Secondary_Type'] = 'Offside'

            pbp_events.append(play)

    pbp = pd.DataFrame(pbp_events)
    pbp['Game_Id'] = gameid
    '''
    try:
        pbp['xC'] = get_coords(gameid)['xC']
    except:
        pass
    try:
        pbp['yC'] = get_coords(gameid)['yC']
    except:
        pass
    '''

    columns = [
        'Game_Id', 'Period', 'Event_Num', 'Event_Type', 'Secondary_Type',
        'Tertiary_Type', 'Penl_Length', 'Event_Desc', 'Time_Elapsed',
        'P1_Team', 'P1_Num', 'P2_Team', 'P2_Num', 'P3_Team', 'P3_Num',
        'P4_Team', 'P4_Num', 'Home_Score', 'Away_Score', 'Home_Zone',
        'Away_Zone', 'Home_Strength', 'Away_Strength', 'G_Pulled_Home',
        'G_Pulled_Away', 'Dist', 'H1Name', 'H1Num', 'H1Pos', 'H2Name', 'H2Num',
        'H2Pos', 'H3Name', 'H3Num', 'H3Pos', 'H4Name', 'H4Num', 'H4Pos',
        'H5Name', 'H5Num', 'H5Pos', 'H6Name', 'H6Num', 'H6Pos', 'A1Name',
        'A1Num', 'A1Pos', 'A2Name', 'A2Num', 'A2Pos', 'A3Name', 'A3Num',
        'A3Pos', 'A4Name', 'A4Num', 'A4Pos', 'A5Name', 'A5Num', 'A5Pos',
        'A6Name', 'A6Num', 'A6Pos', 'Penalty_Shot'
    ]
    pbp = pbp.reindex_axis(columns, axis=1)

    # add S/V adjustment coefficients for each pbp event
    def adjustment(row):
        if row['Home_Score'] - row['Away_Score'] >= 3:
            return 1.132, 0.895
        elif row['Home_Score'] - row['Away_Score'] == 2:
            return 1.074, 0.936
        elif row['Home_Score'] - row['Away_Score'] == 1:
            return 1.026, 0.975
        elif row['Home_Score'] - row['Away_Score'] == 0:
            return 0.970, 1.032
        elif row['Home_Score'] - row['Away_Score'] == -1:
            return 0.915, 1.103
        elif row['Home_Score'] - row['Away_Score'] == -2:
            return 0.882, 1.154
        elif row['Home_Score'] - row['Away_Score'] <= -3:
            return 0.850, 1.214
        else:
            return np.NaN, np.NaN

    pbp['Home_Adjustment'] = pbp.apply(lambda row: adjustment(row)[0], axis=1)
    pbp['Away_Adjustment'] = pbp.apply(lambda row: adjustment(row)[1], axis=1)

    return pbp