def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play['Event_Type'] = str(change_event_name(event['result']['eventTypeId'])) play['Event_Desc'] = event['result']['description'] # play['Event_Num'] = event['about']['eventIdx'] # commented out as eventIdx in json does not match event num from html play['Period'] = event['about']['period'] play['Time_Elapsed'] = convert_to_seconds(event['about']['periodTime']) play['Away_Score'] = event['about']['goals']['away'] play['Home_Score'] = event['about']['goals']['home'] if event['result']['eventTypeId'] == "SHOT": if "Snap" in event['result']['secondaryType']: play["Secondary_Type"] = "Snap" elif "Slap" in event['result']['secondaryType']: play["Secondary_Type"] = "Slap" elif "Backhand" in event['result']['secondaryType']: play["Secondary_Type"] = "Backhand" elif "Tip-In" in event['result']['secondaryType']: play["Secondary_Type"] = "Tip-In" elif "Wrap-around" in event['result']['secondaryType']: play["Secondary_Type"] = "Wrap-around" elif "Deflected" in event['result']['secondaryType']: play["Secondary_Type"] = "Deflected" if event['result']['eventTypeId'] == "MISSED_SHOT": play["Secondary_Type"] = event['result']['description'].split('-')[1] if event['result']['eventTypeId'] == "PENALTY": play["Secondary_Type"] = event['result']['secondaryType'] play["Penl_Length"] = event['result']['penaltyMinutes'] if event['result']['eventTypeId'] == "GOAL": play["Secondary_Type"] = event['result']['secondaryType'] # If there's a players key that means an event occurred on the play. if 'players' in event.keys(): for i in range(len(event['players'])): play['P{}_Name'.format(i + 1)] = fix_name( event['players'][i]['player']['fullName'].upper()) play['P{}_Id'.format(i + 1)] = str( event['players'][i]['player']['id']) # Coordinates aren't always there try: play['xC'] = event['coordinates']['x'] play['yC'] = event['coordinates']['y'] except KeyError: play['xC'] = '' play['yC'] = '' return play
def analyze_shifts(shift, name, team): """ Analyze shifts for each player when using. Prior to this each player (in a dictionary) has a list with each entry being a shift. This function is only used for the html :param shift: info on shift :param name: player name :param team: given team :return: dict with info for shift """ shifts = dict() shifts['Player'] = name.upper() shifts['Period'] = '4' if shift[1] == 'OT' else shift[1] shifts['Team'] = fix_team(team.strip(' ')) shifts['Shift'] = shift[0] shifts['Start'] = convert_to_seconds(shift[2].split('/')[0]) shifts['End'] = convert_to_seconds(shift[3].split('/')[0]) shifts['Duration'] = convert_to_seconds(shift[4].split('/')[0]) return shifts
def parse_shift(shift): """ Parse shift for json :param shift: json for shift :return: dict with shift info """ shift_dict = dict() name = fix_name(' '.join([shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper()])) shift_dict['Period'] = shift['period'] shift_dict['Player'] = name shift_dict['Team'] = shift['teamAbbrev'] shift_dict['Shift'] = shift['shiftNumber'] if shift['eventDescription'] is None: shift_dict['Start'] = convert_to_seconds(shift['startTime']) shift_dict['End'] = convert_to_seconds(shift['endTime']) shift_dict['Duration'] = convert_to_seconds(shift['duration']) else: shift_dict = dict() return shift_dict
def parse_event(event): """ Parse each event. In the string each field is separated by a '~'. Relevant for here: The first two are the x and y coordinates. And the 4th and 5th are the time elapsed and period. :param event: string with info :return: return dict with relevant info """ info = dict() fields = event.split('~') # Shootouts screw everything up so don't bother...coordinates don't matter there either way if fields[4] == '5': return None info['xC'] = int(fields[0]) info['yC'] = int(fields[1]) info['Time_Elapsed'] = convert_to_seconds(fields[3]) info['Period'] = fields[4] info['Event_Type'] = event_type(fields[8].upper()) return info
def parse_pbp(gameid): """ parse cleaned html and create pd Dataframe :param gameid: game id :return: df where each row is an event in pb """ html = get_pbp(gameid) events = clean_html_pbp(html) teams = dict() teams['away'] = events[0][6][:3] teams['home'] = events[0][7][:3] score = {'home': 0, 'away': 0} pbp_events = [] event_types = { 'PERIOD_START': 'PSTR', 'FACEOFF': 'FAC', 'BLOCKED_SHOT': 'BLOCK', 'GAME_END': 'GEND', 'GIVEAWAY': 'GIVE', 'GOAL': 'GOAL', 'HIT': 'HIT', 'MISSED_SHOT': 'MISS', 'PERIOD_END': 'PEND', 'SHOT': 'SHOT', 'STOP': 'STOP', 'TAKEAWAY': 'TAKE', 'PENALTY': 'PENL', 'Early Intermission Start': 'EISTR', 'Early Intermission End': 'EIEND', 'Shootout Completed': 'SOC', } for i in events[:len(events)]: if i[4] in event_types.values() and i[0] != '#': play = dict() play['Home_Score'] = score['home'] play['Away_Score'] = score['away'] # play['Team_away'] = teams['away'] # play['Team_home'] = teams['home'] play['Event_Num'] = i[0] play['Period'] = i[1] # play['strength'] = i[2] play['Time_Elapsed'] = convert_to_seconds(i[3]) play['Event_Type'] = i[4] play['Event_Desc'] = i[5] if 'Off. Zone' in i[5] and i[5][:3] == teams['home']: play['Home_Zone'] = 'OZ' play['Away_Zone'] = 'DZ' elif 'Off. Zone' in i[5] and i[5][:3] == teams['away']: play['Home_Zone'] = 'DZ' play['Away_Zone'] = 'OZ' elif 'Def. Zone' in i[5] and i[5][:3] == teams['home'] and i[ 4] == 'BLOCK': play['Home_Zone'] = 'OZ' play['Away_Zone'] = 'DZ' elif 'Def. Zone' in i[5] and i[5][:3] == teams[ 'home'] and i[4] != 'BLOCK': play['Home_Zone'] = 'DZ' play['Away_Zone'] = 'OZ' elif 'Def. Zone' in i[5] and i[5][:3] == teams['away'] and i[ 4] == 'BLOCK': play['Home_Zone'] = 'DZ' play['Away_Zone'] = 'OZ' elif 'Def. Zone' in i[5] and i[5][:3] == teams[ 'away'] and i[4] != 'BLOCK': play['Home_Zone'] = 'OZ' play['Away_Zone'] = 'DZ' elif 'Neu. Zone' in i[5]: play['Home_Zone'] = 'NZ' play['Away_Zone'] = 'NZ' if len(i[6]) > 0 and i[6][len(i[6]) - 1][2] == 'G': play['Away_Strength'] = len(i[6]) - 1 play['G_Pulled_Away'] = False elif len(i[6]) > 0 and i[6][len(i[6]) - 1][2] != 'G': play['Away_Strength'] = len(i[6]) play['G_Pulled_Away'] = True if len(i[7]) > 0 and i[7][len(i[7]) - 1][2] == 'G': play['Home_Strength'] = len(i[7]) - 1 play['G_Pulled_Home'] = False elif len(i[7]) > 0 and i[7][len(i[7]) - 1][2] != 'G': play['Home_Strength'] = len(i[7]) play['G_Pulled_Home'] = True for player in i[6]: n = i[6].index(player) + 1 play['A{}Name'.format(n)] = fix_name(player[0]) play['A{}Num'.format(n)] = player[1] play['A{}Pos'.format(n)] = player[2] for player in i[7]: n = i[7].index(player) + 1 play['H{}Name'.format(n)] = fix_name(player[0]) play['H{}Num'.format(n)] = player[1] play['H{}Pos'.format(n)] = player[2] if 'Penalty Shot' in i[5]: play['Penalty_Shot'] = True else: play['Penalty_Shot'] = False if i[4] == 'FAC': # MTL won Neu. Zone - MTL #11 GOMEZ vs TOR #37 BRENT play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P2_Team'] = value regex = re.compile(r'(.{3})\s+#(\d+)') desc = regex.findall(i[5]) # [[Team, num], [Team, num]] for player in desc: if player[0] == i[5][:3]: play['P1_Num'] = player[1] else: play['P2_Num'] = player[1] penl = { 'Hooking': 'Hooking', 'Holding': 'Holding', 'Holding the stick': 'Holding the stick', 'Interference': 'Interference', 'Roughing': 'Roughing', 'Tripping': 'Tripping', 'Unsportsmanlike conduct': 'Unsportsmanlike conduct', 'Illegal equipment': 'Illegal equipment', 'Diving': 'Embellishment', 'Embellishment': 'Embellishment', 'Broken stick': 'Broken stick', 'Delaying Game-Puck over glass': 'Delaying Game-Puck over glass', 'Delay Gm - Face-off Violation': 'Delay Gm - Face-off Violation', 'Delay of game': 'Delay of game', 'Delaying the game': 'Delay of game', 'Delay of game - bench': 'Delay of game - bench', 'Delaying Game-Ill. play goalie': 'Delaying Game-Ill. play goalie', 'Delaying Game-Smothering puck': 'Delaying Game-Smothering puck', 'Goalie leave crease': 'Goalie leave crease', 'Face-off violation-bench': 'Face-off violation-bench', 'Bench': 'Bench', 'Illegal stick': 'Illegal stick', 'Closing hand on puck': 'Closing hand on puck', 'Throwing stick': 'Throwing stick', 'Too many men/ice - bench': 'Too many men/ice - bench', 'Abusive language - bench': 'Abuse of officials - bench', 'Abuse of officials - bench': 'Abuse of officials - bench', 'Interference on goalkeeper': 'Interference on goalkeeper', 'Hi-sticking': 'High-sticking', 'Hi stick - double minor': 'High-sticking', 'Cross checking': 'Cross checking', 'Cross check - double minor': 'Cross checking', 'Slashing': 'Slashing', 'Charging': 'Charging', 'Boarding': 'Boarding', 'Kneeing': 'Kneeing', 'Clipping': 'Clipping', 'Elbowing': 'Elbowing', 'Spearing': 'Spearing', 'Butt ending': 'Butt ending', 'Head butting': 'Head butting', 'Illegal check to head': 'Illegal check to head', 'Fighting': 'Fighting', 'Instigator': 'Instigator', 'Abuse of officials': 'Abuse of officials', 'Aggressor': 'Aggressor', 'PS-': "Penalty Shot", 'PS -': "Penalty Shot", 'Misconduct': 'Misconduct', 'Game misconduct': 'Game misconduct', 'Game Misconduct': 'Game misconduct', 'Match penalty': 'Game Misconduct', 'Checking from behind': 'Checking from behind' } if i[4] == 'PENL': for pen in penl: if pen in i[5]: play['Secondary_Type'] = penl[pen] elif pen not in penl: play['Secondary_Type'] = 'misc' if '2 min' in i[5]: play['Penl_Length'] = 2 elif '4 min' in i[5]: play['Penl_Length'] = 4 elif '5 min' in i[5]: play['Penl_Length'] = 5 elif '10 min' in i[5]: play['Penl_Length'] = 10 play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P2_Team'] = value if 'TEAM' in i[5]: play['P1_Num'] = i[5][:3] else: regex = re.compile(r'(.{3})\s+#(\d+)') desc = regex.findall(i[5]) # [[Team, num], [Team, num]] for player in desc: if player[0] == i[5][:3]: play['P1_Num'] = player[1] elif player[0] != i[5][:3]: play['P2_Num'] = player[1] if i[4] == 'HIT': play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P2_Team'] = value regex = re.compile(r'(.{3})\s+#(\d+)') desc = regex.findall(i[5]) # [[Team, num], [Team, num]] for player in desc: if player[0] == i[5][:3]: play['P1_Num'] = player[1] elif player[0] != i[5][:3]: play['P2_Num'] = player[1] shot_types = [ 'Wrist', 'Snap', 'Slap', 'Deflected', 'Tip-In', 'Backhand', 'Wrap-around' ] if i[4] == 'BLOCK': play['P2_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P1_Team'] = value for shot in shot_types: if shot in i[5]: play['Secondary_Type'] = shot regex = re.compile(r'(.{3})\s+#(\d+)') desc = regex.findall(i[5]) # [[Team, num], [Team, num]] for player in desc: if player[0] == i[5][:3]: play['P2_Num'] = player[1] elif player[0] != i[5][:3]: play['P1_Num'] = player[1] if i[4] == 'SHOT': play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P4_Team'] = value for shot in shot_types: if shot in i[5]: play['Secondary_Type'] = shot regex = re.compile(r'#(\d+)') desc = regex.search(i[5]).groups() # num play['P1_Num'] = desc[0] for key, value in teams.items(): if value != i[5][:3] and key == 'away': play['P4_Num'] = i[6][len(i[6]) - 1][1] elif value != i[5][:3] and key == 'home': play['P4_Num'] = i[7][len(i[7]) - 1][1] dist = i[5].split(',') feet = re.findall('\d+', dist[len(dist) - 1][1:]) play['Dist'] = feet[0] miss_types = ['Over Net', 'Wide of Net', 'Crossbar', 'Goalpost'] if i[4] == 'MISS': play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P4_Team'] = value for shot in shot_types: if shot in i[5]: play['Secondary_Type'] = shot for miss in miss_types: if miss in i[5]: play['Tertiary_Type'] = miss regex = re.compile(r'#(\d+)') desc = regex.search(i[5]).groups() # num play['P1_Num'] = desc[0] for key, value in teams.items(): if value != i[5][:3] and key == 'away': play['P4_Num'] = i[6][len(i[6]) - 1][1] elif value != i[5][:3] and key == 'home': play['P4_Num'] = i[7][len(i[7]) - 1][1] dist = i[5].split(',') feet = re.findall('\d+', dist[len(dist) - 1][1:]) play['Dist'] = feet[0] if i[4] == 'GIVE' or i[4] == 'TAKE': play['P1_Team'] = i[5][:3] regex = re.compile(r'#(\d+)') desc = regex.search(i[5]).groups() # num play['P1_Num'] = desc[0] if i[4] == 'GOAL': play['P1_Team'] = i[5][:3] for value in teams.values(): if value != i[5][:3]: play['P4_Team'] = value for shot in shot_types: if shot in i[5]: play['Secondary_Type'] = shot for key, value in teams.items(): if value != i[5][:3] and key == 'away': play['P4_Num'] = i[6][len(i[6]) - 1][1] elif value != i[5][:3] and key == 'home': play['P4_Num'] = i[7][len(i[7]) - 1][1] regex = re.compile(r'#(\d+)\s+') desc = regex.findall( i[5]) # [num] -> ranging from 1 to 3 indices play['P1_Num'] = desc[0] if len(desc) >= 2: play['P2_Num'] = desc[1] play['P2_Team'] = i[5][:3] if len(desc) == 3: play['P3_Num'] = desc[2] play['P3_Team'] = i[5][:3] dist = i[5].split(',') for d in dist: if 'ft' in d: feet = re.findall('\d+', d[1:]) play['Dist'] = feet[0] for key, value in teams.items(): if value == i[5][:3] and key == 'away': score['away'] += 1 elif value == i[5][:3] and key == 'home': score['home'] += 1 if i[4] == 'STOP': if 'ICING' in i[5]: play['Secondary_Type'] = 'Icing' elif 'OFFSIDE' in i[5]: play['Secondary_Type'] = 'Offside' pbp_events.append(play) pbp = pd.DataFrame(pbp_events) pbp['Game_Id'] = gameid ''' try: pbp['xC'] = get_coords(gameid)['xC'] except: pass try: pbp['yC'] = get_coords(gameid)['yC'] except: pass ''' columns = [ 'Game_Id', 'Period', 'Event_Num', 'Event_Type', 'Secondary_Type', 'Tertiary_Type', 'Penl_Length', 'Event_Desc', 'Time_Elapsed', 'P1_Team', 'P1_Num', 'P2_Team', 'P2_Num', 'P3_Team', 'P3_Num', 'P4_Team', 'P4_Num', 'Home_Score', 'Away_Score', 'Home_Zone', 'Away_Zone', 'Home_Strength', 'Away_Strength', 'G_Pulled_Home', 'G_Pulled_Away', 'Dist', 'H1Name', 'H1Num', 'H1Pos', 'H2Name', 'H2Num', 'H2Pos', 'H3Name', 'H3Num', 'H3Pos', 'H4Name', 'H4Num', 'H4Pos', 'H5Name', 'H5Num', 'H5Pos', 'H6Name', 'H6Num', 'H6Pos', 'A1Name', 'A1Num', 'A1Pos', 'A2Name', 'A2Num', 'A2Pos', 'A3Name', 'A3Num', 'A3Pos', 'A4Name', 'A4Num', 'A4Pos', 'A5Name', 'A5Num', 'A5Pos', 'A6Name', 'A6Num', 'A6Pos', 'Penalty_Shot' ] pbp = pbp.reindex_axis(columns, axis=1) # add S/V adjustment coefficients for each pbp event def adjustment(row): if row['Home_Score'] - row['Away_Score'] >= 3: return 1.132, 0.895 elif row['Home_Score'] - row['Away_Score'] == 2: return 1.074, 0.936 elif row['Home_Score'] - row['Away_Score'] == 1: return 1.026, 0.975 elif row['Home_Score'] - row['Away_Score'] == 0: return 0.970, 1.032 elif row['Home_Score'] - row['Away_Score'] == -1: return 0.915, 1.103 elif row['Home_Score'] - row['Away_Score'] == -2: return 0.882, 1.154 elif row['Home_Score'] - row['Away_Score'] <= -3: return 0.850, 1.214 else: return np.NaN, np.NaN pbp['Home_Adjustment'] = pbp.apply(lambda row: adjustment(row)[0], axis=1) pbp['Away_Adjustment'] = pbp.apply(lambda row: adjustment(row)[1], axis=1) return pbp