def get_espn_game_id(date, home_team, away_team): """ Scrapes the day's schedule and gets the id for the given game Ex: http://www.espn.com/nhl/scoreboard?date=20161024 :param date: format-> YearMonthDay-> 20161024 :param home_team: home team :param away_team: away team :return: 9 digit game id """ url = 'http://www.espn.com/nhl/scoreboard?date={}'.format( date.replace('-', '')) response = shared.get_url(url) # If can't get or not there return None if not response: raise Exception game_ids = get_game_ids(response) games = get_teams(response) for i in range(len(games)): if home_team in games[i] or away_team in games[i]: return game_ids[i]
def get_shifts(game_id): """ Given a game_id it returns a DataFrame with the shifts for both teams Ex: http://www.nhl.com/scores/htmlreports/20162017/TV020971.HTM :param game_id: the game :return: DataFrame with all shifts or None """ game_id = str(game_id) home_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TH{}.HTM'.format(game_id[:4], int(game_id[:4])+1, game_id[4:]) away_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TV{}.HTM'.format(game_id[:4], int(game_id[:4])+1, game_id[4:]) home = shared.get_url(home_url) time.sleep(1) away = shared.get_url(away_url) time.sleep(1) return home, away
def get_roster(game_id): """ Given a game_id it returns the raw html Ex: http://www.nhl.com/scores/htmlreports/20162017/RO020475.HTM :param game_id: the game :return: raw html of game """ game_id = str(game_id) url = 'http://www.nhl.com/scores/htmlreports/{}{}/RO{}.HTM'.format(game_id[:4], int(game_id[:4]) + 1, game_id[4:]) return shared.get_url(url)
def get_schedule(date_from, date_to): """ Scrapes games in date range Ex: http://statsapi.web.nhl.com/api/v1/schedule?startDate=2010-10-03&endDate=2011-06-20 :param date_from: scrape from this date :param date_to: scrape until this date :return: raw json of schedule of date range """ url = 'http://statsapi.web.nhl.com/api/v1/schedule?startDate={a}&endDate={b}'.format(a=date_from, b=date_to) response = shared.get_url(url) time.sleep(1) return json.loads(response.text)
def get_shifts(game_id): """ Given a game_id it returns the raw json Ex: http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId=2010020001 :param game_id: the game :return: json or None """ url = 'http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId={}'.format( game_id) response = shared.get_url(url) time.sleep(1) # Return None if can't get page if not response: return None return json.loads(response.text)
def get_espn(date, home_team, away_team): """ Gets the ESPN pbp feed Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300 :param date: date of the game :param home_team: home team :param away_team: away team :return: raw xml """ game_id = get_espn_game_id(date, home_team.upper(), away_team.upper()) url = 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}'.format( game_id) response = shared.get_url(url) if response is None: raise Exception time.sleep(1) return response
def get_pbp(game_id): """ Given a game_id it returns the raw json Ex: http://statsapi.web.nhl.com/api/v1/game/2016020475/feed/live :param game_id: the game :return: raw json of game or None if couldn't get game """ url = 'http://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format( game_id) response = shared.get_url(url) time.sleep(1) # Return None if can't get page if not response: print("Json pbp for game {} is either not there or can't be obtained". format(game_id)) return None return json.loads(response.text)