def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play['event_id'] = event['about']['eventIdx'] play['period'] = event['about']['period'] play['event'] = str(change_event_name(event['result']['eventTypeId'])) play['seconds_elapsed'] = shared.convert_to_seconds(event['about']['periodTime']) # If there's a players key that means an event occurred on the play. if 'players' in event.keys(): play['p1_name'] = shared.fix_name(event['players'][0]['player']['fullName']) play['p1_ID'] = event['players'][0]['player']['id'] for i in range(len(event['players'])): if event['players'][i]['playerType'] != 'Goalie': play['p{}_name'.format(i + 1)] = shared.fix_name(event['players'][i]['player']['fullName'].upper()) play['p{}_ID'.format(i + 1)] = event['players'][i]['player']['id'] # Coordinates aren't always there try: play['xC'] = event['coordinates']['x'] play['yC'] = event['coordinates']['y'] except KeyError: play['xC'] = '' play['yC'] = '' return play
def populate_players(event_dict, players, away_players, home_players): """ Populate away and home player info (and num skaters on each side) :param event_dict: dict with event info :param players: all players in game and info :param away_players: players for away team :param home_players: players for home team :return: None """ for venue in ['home', 'away']: for j in range(6): try: if venue == "home": name = shared.fix_name(home_players[j][0].upper()) foo = 'Home' else: name = shared.fix_name(away_players[j][0].upper()) foo = 'Away' event_dict['{}Player{}'.format(venue, j + 1)] = name event_dict['{}Player{}_id'.format( venue, j + 1)] = players[foo][name]['id'] except KeyError: event_dict['{}Player{}_id'.format(venue, j + 1)] = 'NA' except IndexError: event_dict['{}Player{}'.format(venue, j + 1)] = '' event_dict['{}Player{}_id'.format(venue, j + 1)] = '' # Did this because above method assumes the goalie is at end of player list for x in away_players: if x[2] == 'G': event_dict['Away_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Away_Goalie_Id'] = players['Away'][ event_dict['Away_Goalie']]['id'] except KeyError: event_dict['Away_Goalie_Id'] = 'NA' else: event_dict['Away_Goalie'] = '' event_dict['Away_Goalie_Id'] = '' for x in home_players: if x[2] == 'G': event_dict['Home_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Home_Goalie_Id'] = players['Home'][ event_dict['Home_Goalie']]['id'] except KeyError: event_dict['Home_Goalie_Id'] = 'NA' else: event_dict['Home_Goalie'] = '' event_dict['Home_Goalie_Id'] = '' event_dict['Away_Players'] = len(away_players) event_dict['Home_Players'] = len(home_players)
def combine_players_lists(json_players, roster_players, game_id): """ Combine the json list of players (which contains id's) with the list in the roster html :param json_players: dict of all players with id's :param roster_players: dict with home and and away keys for players :param game_id: id of game :return: dict containing home and away keys -> which contains list of info on each player """ players = {'Home': dict(), 'Away': dict()} for venue in players.keys(): for player in roster_players[venue]: try: name = shared.fix_name(player[2]) player_id = json_players[name]['id'] players[venue][name] = {'id': player_id, 'number': player[0]} except KeyError: # If he was listed as a scratch and not a goalie (check_goalie deals with goalies) # As a whole the scratch list shouldn't be trusted but if a player is missing an id # and is on the # scratch list I'm willing to assume that he didn't play if not player[3] and player[1] != 'G': player.extend([game_id]) players_missing_ids.extend([[player[2], player[4]]]) players[venue][name] = {'id': 'NA', 'number': player[0]} return players
def get_players_json(players_json): """ Return dict of players for that game :param players_json: players section of json :return: dict of players->keys are the name (in uppercase) """ players = dict() for key in players_json.keys(): name = shared.fix_name(players_json[key]['fullName'].upper()) players[name] = { 'id': ' ', 'last_name': players_json[key]['lastName'].upper() } try: players[name]['id'] = players_json[key]['id'] except KeyError: shared.print_warning( '{name} is missing an ID number in the pbp json'.format( name=name)) players[name]['id'] = 'NA' return players
def parse_shift(shift): """ Parse shift for json :param shift: json for shift :return: dict with shift info """ shift_dict = dict() name = shared.fix_name(' '.join([ shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper() ])) shift_dict['Player'] = name shift_dict['Player_Id'] = shift['playerId'] shift_dict['Period'] = shift['period'] shift_dict['Team'] = fix_team_tricode(shift['teamAbbrev']) # At the end of the json they list when all the goal events happened. They are the only one's which have their # eventDescription be not null if shift['eventDescription'] is None: shift_dict['Start'] = shared.convert_to_seconds(shift['startTime']) shift_dict['End'] = shared.convert_to_seconds(shift['endTime']) shift_dict['Duration'] = shared.convert_to_seconds(shift['duration']) else: shift_dict = dict() return shift_dict
def parse_html(html, player_ids, game_id): """ Parse the html Note: Don't f**k with this!!! I'm not exactly sure how or why but it works. :param html: cleaned up html :param player_ids: dict of home and away players :param game_id: id for game :return: DataFrame with info """ columns = [ 'Game_Id', 'Player', 'Player_Id', 'Period', 'Team', 'Start', 'End', 'Duration' ] df = pd.DataFrame(columns=columns) td, teams = get_soup(html) team = teams[0] home_team = teams[1] players = dict() # The list 'td' is laid out with player name followed by every component of each shift. Each shift contains: # shift #, Period, begin, end, and duration. The shift event isn't included. for t in td: t = t.get_text() if ',' in t: # If it has a comma in it we know it's a player's name...so add player to dict name = t # Just format the name normally...it's coded as: 'num last_name, first_name' name = name.split(',') name = ' '.join([name[1].strip(' '), name[0][2:].strip(' ')]) name = shared.fix_name(name) players[name] = dict() players[name]['number'] = name[0][:2].strip() players[name]['Shifts'] = [] else: # Here we add all the shifts to whatever player we are up to players[name]['Shifts'].extend([t]) for key in players.keys(): # Create a list of lists (each length 5)...corresponds to 5 columns in html shifts players[key]['Shifts'] = [ players[key]['Shifts'][i:i + 5] for i in range(0, len(players[key]['Shifts']), 5) ] # Parse each shift shifts = [ analyze_shifts(shift, key, team, home_team, player_ids) for shift in players[key]['Shifts'] ] df = df.append(shifts, ignore_index=True) df['Game_Id'] = str(game_id)[5:] return df
def get_players_json(json): """ Return dict of players for that game :param json: gameData section of json :return: dict of players->keys are the name (in uppercase) """ players = dict() players_json = json['players'] for key in players_json.keys(): name = shared.fix_name(players_json[key]['fullName'].upper()) players[name] = {'id': ' '} try: players[name]['id'] = players_json[key]['id'] except KeyError: print(name, ' is missing an ID number in the pbp json') players[name]['id'] = 'NA' return players