def get_shifts(game_id): """ Given a game_id it returns a the shifts for both teams Ex: http://www.nhl.com/scores/htmlreports/20162017/TV020971.HTM :param game_id: the game :return: Shifts or None """ game_id = str(game_id) home_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TH{}.HTM'.format( game_id[:4], int(game_id[:4]) + 1, game_id[4:]) away_url = 'http://www.nhl.com/scores/htmlreports/{}{}/TV{}.HTM'.format( game_id[:4], int(game_id[:4]) + 1, game_id[4:]) page_info = { "url": home_url, "name": game_id, "type": "html_shifts_home", "season": game_id[:4], } # Get info for home shifts home = shared.get_file(page_info) # Change info for scraping away page and scrape it page_info["type"] = "html_shifts_away" page_info["url"] = away_url away = shared.get_file(page_info) return home, away
def get_espn_date(date): """ Get the page that contains all the games for that day :param date: YYYY-MM-DD :return: response """ page_info = { "url": 'http://www.espn.com/nhl/scoreboard?date={}'.format( date.replace('-', '')), "name": date, "type": "espn_scoreboard", "season": shared.get_season(date), } response = shared.get_file(page_info) # If can't get or not there throw an exception if not response: raise Exception else: return response
def get_espn_game(date, home_team, away_team): """ Gets the ESPN pbp feed Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300 :param date: date of the game :param home_team: home team :param away_team: away team :return: raw xml """ game_id = get_espn_game_id(date, home_team.upper(), away_team.upper()) file_info = { "url": 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}' .format(game_id), "name": game_id, "type": "espn_pbp", "season": shared.get_season(date), } response = shared.get_file(file_info) if response is None: raise Exception return response
def get_espn_game(date, home_team, away_team, game_id=None): """ Gets the ESPN pbp feed Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300 :param date: date of the game :param home_team: home team :param away_team: away team :param game_id: Game id of we already have it - for live scraping. None if not there :return: raw xml """ # Get if not provided if not game_id: game_id = get_espn_game_id(date, home_team.upper(), away_team.upper()) file_info = { "url": 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}'.format(game_id), "name": game_id, "type": "espn_pbp", "season": shared.get_season(date), } response = shared.get_file(file_info) if response is None: raise Exception return response
def get_shifts(game_id): """ Given a game_id it returns the raw json Ex: http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId=2010020001 :param game_id: the game :return: json or None """ page_info = { "url": 'http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId={}'. format(game_id), "name": str(game_id), "type": "json_shifts", "season": str(game_id)[:4], } response = shared.get_file(page_info) # Return empty dict if can't get page if not response: return {} else: return json.loads(response)
def get_roster(game_id): """ Given a game_id it returns the raw html Ex: http://www.nhl.com/scores/htmlreports/20162017/RO020475.HTM :param game_id: the game :return: raw html of game """ game_id = str(game_id) page_info = { "url": 'http://www.nhl.com/scores/htmlreports/{}{}/RO{}.HTM'.format( game_id[:4], int(game_id[:4]) + 1, game_id[4:]), "name": game_id, "type": "html_roster", "season": game_id[:4], } return shared.get_file(page_info)
def get_pbp(game_id): """ Given a game_id it returns the raw json Ex: http://statsapi.web.nhl.com/api/v1/game/2016020475/feed/live :param game_id: string - the game :return: raw json of game or None if couldn't get game """ page_info = { "url": 'http://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id), "name": game_id, "type": "json_pbp", "season": game_id[:4], } response = shared.get_file(page_info) if not response: print("Json pbp for game {} is either not there or can't be obtained". format(game_id)) return {} else: return json.loads(response)
def test_get_file(file_info): """ Test getting the file...it's either scraped or loaded from a file """ original_path = os.getcwd() # When there is either no directory specified or it doesn't exist file = shared.get_file(file_info) assert type(file) == str assert len(file) > 0 assert original_path == os.getcwd() # When the directory exists # Here I just use the directory of this file to make things easy shared.add_dir(os.path.dirname(os.path.realpath(__file__))) file = shared.get_file(file_info) assert type(file) == str assert len(file) > 0 assert original_path == os.getcwd() # Some cleanup....remove stuff created from the file directory and move back os.chdir(os.path.dirname(os.path.realpath(__file__))) shutil.rmtree("docs") os.chdir(original_path)
def get_schedule(date_from, date_to): """ Scrapes games in date range Ex: https://statsapi.web.nhl.com/api/v1/schedule?startDate=2010-10-03&endDate=2011-06-20 :param date_from: scrape from this date :param date_to: scrape until this date :return: raw json of schedule of date range """ page_info = { "url": 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={a}&endDate={b}'.format(a=date_from, b=date_to), "name": date_from + "_" + date_to, "type": "json_schedule", "season": shared.get_season(date_from), } return json.loads(shared.get_file(page_info))
def get_pbp(game_id): """ Get the response for a game (e.g. https://www.nwhl.zone/game/get_play_by_plays?id=18507472) :param game_id: Given Game id (e.g. 18507472) :return: Json """ page_info = { "url": 'https://www.nwhl.zone/game/get_play_by_plays?id={}'.format(game_id), "name": str(game_id), "type": "nwhl_json_pbp", "season": "nwhl", } response = shared.get_file(page_info) if not response: print("Json pbp for game {} is either not there or can't be obtained".format(game_id)) return {} else: return json.loads(response)