def populate_players(event_dict, players, away_players, home_players): """ Populate away and home player info (and num skaters on each side) :param event_dict: dict with event info :param players: all players in game and info :param away_players: players for away team :param home_players: players for home team :return: None """ for venue in ["home", "away"]: for j in range(6): try: if venue == "home": name = shared.fix_name(home_players[j][0].upper()) foo = "Home" else: name = shared.fix_name(away_players[j][0].upper()) foo = "Away" event_dict["{}Player{}".format(venue, j + 1)] = name event_dict["{}Player{}_id".format(venue, j + 1)] = players[foo][name][ "id" ] except KeyError: event_dict["{}Player{}_id".format(venue, j + 1)] = "NA" except IndexError: event_dict["{}Player{}".format(venue, j + 1)] = "" event_dict["{}Player{}_id".format(venue, j + 1)] = "" # Did this because above method assumes the goalie is at end of player list for x in away_players: if x[2] == "G": event_dict["Away_Goalie"] = shared.fix_name(x[0].upper()) try: event_dict["Away_Goalie_Id"] = players["Away"][ event_dict["Away_Goalie"] ]["id"] except KeyError: event_dict["Away_Goalie_Id"] = "NA" else: event_dict["Away_Goalie"] = "" event_dict["Away_Goalie_Id"] = "" for x in home_players: if x[2] == "G": event_dict["Home_Goalie"] = shared.fix_name(x[0].upper()) try: event_dict["Home_Goalie_Id"] = players["Home"][ event_dict["Home_Goalie"] ]["id"] except KeyError: event_dict["Home_Goalie_Id"] = "NA" else: event_dict["Home_Goalie"] = "" event_dict["Home_Goalie_Id"] = "" event_dict["Away_Players"] = len(away_players) event_dict["Home_Players"] = len(home_players)
def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play['event_id'] = event['about']['eventIdx'] play['period'] = event['about']['period'] play['event'] = str(change_event_name(event['result']['event'])) play['seconds_elapsed'] = shared.convert_to_seconds( event['about']['periodTime']) # If there's a players key that means an event occurred on the play. if 'players' in event: play['p1_name'] = shared.fix_name( event['players'][0]['player']['fullName']) play['p1_ID'] = event['players'][0]['player']['id'] for i in range(len(event['players'])): if event['players'][i]['playerType'] != 'Goalie': play['p{}_name'.format(i + 1)] = shared.fix_name( event['players'][i]['player']['fullName']) play['p{}_ID'.format(i + 1)] = event['players'][i]['player']['id'] play['xC'] = event['coordinates'].get('x') play['yC'] = event['coordinates'].get('y') return play
def parse_event(event): """ Parses a single event when the info is in a json format :param event: json of event :return: dictionary with the info """ play = dict() play["event_id"] = event["about"]["eventIdx"] play["period"] = event["about"]["period"] play["event"] = str(change_event_name(event["result"]["event"])) play["seconds_elapsed"] = shared.convert_to_seconds(event["about"]["periodTime"]) # If there's a players key that means an event occurred on the play. if "players" in event.keys(): play["p1_name"] = shared.fix_name(event["players"][0]["player"]["fullName"]) play["p1_ID"] = event["players"][0]["player"]["id"] for i in range(len(event["players"])): if event["players"][i]["playerType"] != "Goalie": play["p{}_name".format(i + 1)] = shared.fix_name( event["players"][i]["player"]["fullName"].upper() ) play["p{}_ID".format(i + 1)] = event["players"][i]["player"]["id"] play["xC"] = event["coordinates"].get("x") play["yC"] = event["coordinates"].get("y") return play
def populate_players(event_dict, players, away_players, home_players): """ Populate away and home player info (and num skaters on each side) :param event_dict: dict with event info :param players: all players in game and info :param away_players: players for away team :param home_players: players for home team :return: None """ for venue in ['home', 'away']: for j in range(6): try: if venue == "home": name = shared.fix_name(home_players[j][0].upper()) foo = 'Home' else: name = shared.fix_name(away_players[j][0].upper()) foo = 'Away' event_dict['{}Player{}'.format(venue, j + 1)] = name event_dict['{}Player{}_id'.format( venue, j + 1)] = players[foo][name]['id'] except KeyError: event_dict['{}Player{}_id'.format(venue, j + 1)] = 'NA' except IndexError: event_dict['{}Player{}'.format(venue, j + 1)] = '' event_dict['{}Player{}_id'.format(venue, j + 1)] = '' # Did this because above method assumes the goalie is at end of player list for x in away_players: if x[2] == 'G': event_dict['Away_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Away_Goalie_Id'] = players['Away'][ event_dict['Away_Goalie']]['id'] except KeyError: event_dict['Away_Goalie_Id'] = 'NA' else: event_dict['Away_Goalie'] = '' event_dict['Away_Goalie_Id'] = '' for x in home_players: if x[2] == 'G': event_dict['Home_Goalie'] = shared.fix_name(x[0].upper()) try: event_dict['Home_Goalie_Id'] = players['Home'][ event_dict['Home_Goalie']]['id'] except KeyError: event_dict['Home_Goalie_Id'] = 'NA' else: event_dict['Home_Goalie'] = '' event_dict['Home_Goalie_Id'] = '' event_dict['Away_Players'] = len(away_players) event_dict['Home_Players'] = len(home_players)
def parse_shift(shift): """ Parse shift for json :param shift: json for shift :return: dict with shift info """ shift_dict = dict() name = shared.fix_name(" ".join([ shift["firstName"].strip(" ").upper(), shift["lastName"].strip(" ").upper(), ])) shift_dict["Player"] = name shift_dict["Player_Id"] = shift["playerId"] shift_dict["Period"] = shift["period"] shift_dict["Team"] = fix_team_tricode(shift["teamAbbrev"]) # At the end of the json they list when all the goal events happened. They are the only one's which have their # eventDescription be not null if shift["eventDescription"] is None: shift_dict["Start"] = shared.convert_to_seconds(shift["startTime"]) shift_dict["End"] = shared.convert_to_seconds(shift["endTime"]) shift_dict["Duration"] = shared.convert_to_seconds(shift["duration"]) else: shift_dict = dict() return shift_dict
def get_players_json(game_json): """ Return dict of players for that game by team :param players_json: players section of json :return: {team -> players} """ players = {"home": {}, "away": {}} for venue in players: team_players = game_json['liveData']['boxscore']['teams'][venue][ 'players'] team_name = shared.get_team( game_json['liveData']['boxscore']['teams'][venue]['team']['name']) for id_key in team_players: player_name = shared.fix_name( team_players[id_key]['person']['fullName']) players[venue][player_name] = { "id": team_players[id_key]['person']['id'], "last_name": game_json['gameData']['players'][id_key]['lastName'].upper() } return players
def get_players_json(players_json): """ Return dict of players for that game :param players_json: players section of json :return: dict of players->keys are the name (in uppercase) """ players = dict() for key in players_json.keys(): name = shared.fix_name(players_json[key]['fullName'].upper()) players[name] = { 'id': ' ', 'last_name': players_json[key]['lastName'].upper() } try: players[name]['id'] = players_json[key]['id'] except KeyError: shared.print_warning( '{name} is missing an ID number in the pbp json'.format( name=name)) players[name]['id'] = 'NA' return players
def parse_shift(shift): """ Parse shift for json :param shift: json for shift :return: dict with shift info """ shift_dict = dict() # At the end of the json they list when all the goal events happened. We don't want them... # They are the only one's which have their eventDescription be not null if shift['eventDescription'] is not None: return {} name = shared.fix_name(' '.join([shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper()])) shift_dict['Player'] = name shift_dict['Player_Id'] = shift['playerId'] shift_dict['Period'] = shift['period'] shift_dict['Team'] = fix_team_tricode(shift['teamAbbrev']) shift_dict['Start'] = shared.convert_to_seconds(shift['startTime']) shift_dict['End'] = shared.convert_to_seconds(shift['endTime']) shift_dict['Duration'] = shared.convert_to_seconds(shift['duration']) return shift_dict
def parse_html(html, player_ids, game_id): """ Parse the html Note: Don't f**k with this!!! I'm not exactly sure how or why but it works. :param html: cleaned up html :param player_ids: dict of home and away players :param game_id: id for game :return: DataFrame with info """ columns = [ 'Game_Id', 'Player', 'Player_Id', 'Period', 'Team', 'Start', 'End', 'Duration' ] df = pd.DataFrame(columns=columns) td, teams = get_soup(html) team = teams[0] home_team = teams[1] players = dict() # The list 'td' is laid out with player name followed by every component of each shift. Each shift contains: # shift #, Period, begin, end, and duration. The shift event isn't included. for t in td: t = t.get_text() if ',' in t: # If it has a comma in it we know it's a player's name...so add player to dict name = t # Just format the name normally...it's coded as: 'num last_name, first_name' name = name.split(',') name = ' '.join([name[1].strip(' '), name[0][2:].strip(' ')]) name = shared.fix_name(name) players[name] = dict() players[name]['number'] = name[0][:2].strip() players[name]['Shifts'] = [] else: # Here we add all the shifts to whatever player we are up to players[name]['Shifts'].extend([t]) for key in players.keys(): # Create a list of lists (each length 5)...corresponds to 5 columns in html shifts players[key]['Shifts'] = [ players[key]['Shifts'][i:i + 5] for i in range(0, len(players[key]['Shifts']), 5) ] # Parse each shift shifts = [ analyze_shifts(shift, key, team, home_team, player_ids) for shift in players[key]['Shifts'] ] df = df.append(shifts, ignore_index=True) df['Game_Id'] = str(game_id)[5:] return df
def populate_players(event_dict, players, away_players, home_players): """ Populate away and home player info (and num skaters on each side). These include: 1. HomePlayer & AwayPlayers fields from 1-6 for name/id 2. Home & Away Goalie Fields for name/id :param event_dict: dict with event info :param players: all players in game and info :param away_players: players for away team :param home_players: players for home team :return: None """ for venue in ['Home', 'Away']: for j in range(6): # Deal with the Home & Away Player Fields try: ven_player = home_players[ j] if venue == "Home" else away_players[j] name = shared.fix_name(ven_player[0]) event_dict['{}Player{}'.format(venue.lower(), j + 1)] = name event_dict['{}Player{}_id'.format( venue.lower(), j + 1)] = players[venue][name]['id'] except KeyError: event_dict['{}Player{}_id'.format(venue.lower(), j + 1)] = None except IndexError: event_dict['{}Player{}'.format(venue.lower(), j + 1)] = None event_dict['{}Player{}_id'.format(venue.lower(), j + 1)] = None continue # If the player is a goalie we try filling that field if ven_player[2] == "G": try: event_dict['{}_Goalie'.format(venue)] = name event_dict['{}_Goalie_Id'.format( venue)] = players[venue][name]['id'] except KeyError: pass # Control for when no goalies present if '{}_Goalie'.format(venue) not in event_dict: event_dict['{}_Goalie'.format(venue)] = None if '{}_Goalie_Id'.format(venue) not in event_dict: event_dict['{}_Goalie_Id'.format(venue)] = None event_dict['Away_Players'] = len(away_players) event_dict['Home_Players'] = len(home_players)
def combine_players_lists(json_players, roster_players, game_id): """ Combine the json list of players (which contains id's) with the list in the roster html :param json_players: dict of all players with id's :param roster_players: dict with home and and away keys for players :param game_id: id of game :return: dict containing home and away keys -> which contains list of info on each player """ players = {"Home": dict(), "Away": dict()} for venue in players.keys(): for player in roster_players[venue]: try: name = shared.fix_name(player[2]) player_id = ( json_players[name]["id"] if name != "SEBASTIAN AHO" else get_sebastian_aho(player) ) players[venue][name] = { "id": player_id, "number": player[0], "last_name": json_players[name]["last_name"], } except KeyError: # If he was listed as a scratch and not a goalie (check_goalie deals with goalies) # As a whole the scratch list shouldn't be trusted but if a player is missing an id # and is on the # scratch list I'm willing to assume that he didn't play if not player[3] and player[1] != "G": player.extend([game_id]) players_missing_ids.extend([[player[2], player[4]]]) players[venue][name] = { "id": "NA", "number": player[0], "last_name": "", } return players
def get_players_json(players_json): """ Return dict of players for that game :param players_json: players section of json :return: dict of players->keys are the name (in uppercase) """ players = dict() for key in players_json.keys(): name = shared.fix_name(players_json[key]["fullName"].upper()) players[name] = {"id": " ", "last_name": players_json[key]["lastName"].upper()} try: players[name]["id"] = players_json[key]["id"] except KeyError: shared.print_warning( "{name} is missing an ID number in the pbp json".format(name=name) ) players[name]["id"] = "NA" return players
def combine_players_lists(json_players, roster_players, game_id): """ Combine the json list of players (which contains id's) with the list in the roster html :param json_players: dict of all players with id's :param roster_players: dict with home and and away keys for players :param game_id: id of game :return: dict containing home and away keys -> which contains list of info on each player """ players = {'Home': dict(), 'Away': dict()} for venue in players: for player in roster_players[venue]: try: name = shared.fix_name(player[2]) player_id = json_players[venue.lower()][name]['id'] players[venue][name] = { 'id': player_id, 'number': player[0], 'last_name': json_players[venue.lower()][name]['last_name'] } except KeyError as e: # If he was listed as a scratch and not a goalie (check_goalie deals with goalies) # As a whole the scratch list shouldn't be trusted but if a player is missing an id # and is on the # scratch list I'm willing to assume that he didn't play if not player[3] and player[1] != 'G': player.extend([game_id]) players_missing_ids.extend([[player[2], player[4]]]) players[venue][name] = { 'id': None, 'number': player[0], 'last_name': '' } return players