def get_espn_game(date, home_team, away_team): """ Gets the ESPN pbp feed Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300 :param date: date of the game :param home_team: home team :param away_team: away team :return: raw xml """ game_id = get_espn_game_id(date, home_team.upper(), away_team.upper()) file_info = { "url": 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}' .format(game_id), "name": game_id, "type": "espn_pbp", "season": shared.get_season(date), } response = shared.get_file(file_info) if response is None: raise Exception return response
def get_espn_date(date): """ Get the page that contains all the games for that day :param date: YYYY-MM-DD :return: response """ page_info = { "url": 'http://www.espn.com/nhl/scoreboard?date={}'.format( date.replace('-', '')), "name": date, "type": "espn_scoreboard", "season": shared.get_season(date), } response = shared.get_file(page_info) # If can't get or not there throw an exception if not response: raise Exception else: return response
def scrape_shifts(game_id, players, date): """ Scrape the Shift charts (or TOI tables) :param game_id: json game id :param players: dict of players with numbers and id's :param date: date of game :return: DataFrame with info or None if it fails """ shifts_df = None # Control for fact that shift json is only available from 2010 onwards if shared.get_season(date) >= 2010: shifts_df = json_shifts.scrape_game(game_id) if shifts_df is None: shifts_df = html_shifts.scrape_game(game_id, players) if shifts_df is None: shared.print_warning("Unable to scrape shifts for game" + game_id) broken_shifts_games.extend([[game_id, date]]) return None # Both failed so just return nothing shifts_df['Date'] = date return shifts_df
def get_espn_game(date, home_team, away_team, game_id=None): """ Gets the ESPN pbp feed Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300 :param date: date of the game :param home_team: home team :param away_team: away team :param game_id: Game id of we already have it - for live scraping. None if not there :return: raw xml """ # Get if not provided if not game_id: game_id = get_espn_game_id(date, home_team.upper(), away_team.upper()) file_info = { "url": 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}'.format(game_id), "name": game_id, "type": "espn_pbp", "season": shared.get_season(date), } response = shared.get_file(file_info) if response is None: raise Exception return response
def get_schedule(date_from, date_to): """ Scrapes games in date range Ex: https://statsapi.web.nhl.com/api/v1/schedule?startDate=2010-10-03&endDate=2011-06-20 :param date_from: scrape from this date :param date_to: scrape until this date :return: raw json of schedule of date range """ page_info = { "url": 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={a}&endDate={b}'.format(a=date_from, b=date_to), "name": date_from + "_" + date_to, "type": "json_schedule", "season": shared.get_season(date_from), } return json.loads(shared.get_file(page_info))
def parse_event(event, score, teams, date, game_id, players): """ Parses a single event when the info is in a json format :param event: json of event :param score: Current score of the game :param teams: Teams dict (id -> name) :param date: date of the game :param game_id: game id for game :param players: Dict of player ids to player names :return: dictionary with the info """ play = dict() # Basic shit play['play_index'] = event['play_index'] play['date'] = date play['game_id'] = game_id play['season'] = shared.get_season(date) play['period'] = event['time_interval'] play['seconds_elapsed'] = shared.convert_to_seconds(event['clock_time_string']) if event['clock_time_string'] else None play['home_score'], play['away_score'] = score['home'], score['away'] # If shootout go with 'play_by_play_string' field -> more descriptive play['event'] = event['play_type'] if event['play_type'] != "Shootout" else event['play_by_play_string'].strip() # Teams play['home_team'], play['away_team'] = teams['home']['name'], teams['away']['name'] if event['play_summary']['off_team_id'] == teams['home']['id']: play['ev_team'] = teams['home']['name'] else: play['ev_team'] = teams['away']['name'] # Player Id play['p1_id'] = event.get('primary_player_id') play['away_goalie_id'] = event['play_actions'][0].get('away_team_goalie') play['home_goalie_id'] = event['play_actions'][0].get('home_team_goalie') play['away_goalie'] = players.get(int(play['away_goalie_id']) if play['away_goalie_id'] not in ['', None] else 0) play['home_goalie'] = players.get(int(play['home_goalie_id']) if play['home_goalie_id'] not in ['', None] else 0) # Event specific stuff if event['play_type'] == 'Faceoff': play['p2_id'] = event['play_summary'].get("loser_id") elif event['play_type'] == 'Penalty': # TODO: Format better? play['details'] = ",".join([str(event['play_summary'].get("infraction_type", " ")), str(event['play_summary'].get("penalty_type", " ")), str(event['play_summary'].get("penalty_minutes", " "))]) elif event['play_type'] == "Goal": get_goal_players(play, event, players) play['p2_id'] = event['play_summary'].get("assist_1_id") play['p3_id'] = event['play_summary'].get("assist_2_id") # Update Score if event['play_summary']['off_team_id'] == teams['home']['id']: score['home'] += 1 else: score['away'] += 1 # Player Id's --> Player Names for num in range(1, 4): player_id = play.get('p{num}_id'.format(num=num), 0) # Control for None player_id = player_id if player_id else 0 play['p{num}_name'.format(num=num)] = players.get(int(player_id)) # Coords play['xC'] = event['play_summary'].get('x_coord') play['yC'] = event['play_summary'].get('y_coord') return play
def test_get_season(): """ Tests that this function returns the correct season for a given date""" assert shared.get_season("2017-10-01") == 2017 assert shared.get_season("2016-06-01") == 2015