def get_player_info_from_url(playerid): """ Gets ID, Name, Hand, Pos, DOB, Height, Weight, and Nationality from the NHL API. :param playerid: int, the player id :return: dict with player ID, name, handedness, position, etc """ page = helpers.try_url_n_times(get_player_url(playerid)) data = json.loads(page) info = {} vars_to_get = {'ID': ['people', 0, 'id'], 'Name': ['people', 0, 'fullName'], 'Hand': ['people', 0, 'shootsCatches'], 'Pos': ['people', 0, 'primaryPosition', 'code'], 'DOB': ['people', 0, 'birthDate'], 'Height': ['people', 0, 'height'], 'Weight': ['people', 0, 'weight'], 'Nationality': ['people', 0, 'nationality']} for key, val in vars_to_get.items(): info[key] = helpers.try_to_access_dict(data, *val) # Remove the space in the middle of height if info['Height'] is not None: info['Height'] = info['Height'].replace(' ', '') return info
def get_game_from_url(season, game): """ Gets the page containing information for specified game from NHL API. :param season: int, the season :param game: int, the game :return: str, the page at the url """ return helpers.try_url_n_times(get_game_url(season, game))
def get_team_info_from_url(teamid): """ Pulls ID, abbreviation, and name from the NHL API. :param teamid: int, the team ID :return: (id, abbrev, name) """ teamid = int(teamid) page = helpers.try_url_n_times(get_team_info_url(teamid)) if page is None: return None, None, None teaminfo = json.loads(page) tid = teaminfo['teams'][0]['id'] tabbrev = teaminfo['teams'][0]['abbreviation'] tname = teaminfo['teams'][0]['name'] return tid, tabbrev, tname
def scrape_game_toi(season, game, force_overwrite=False): """ This method scrapes the toi for the given game. :param season: int, the season :param game: int, the game :param force_overwrite: bool. If file exists already, won't scrape again :return: nothing """ filename = get_game_raw_toi_filename(season, game) if not force_overwrite and os.path.exists(filename): return False page = helpers.try_url_n_times(get_shift_url(season, game)) save_raw_toi(page, season, game) # ed.print_and_log('Scraped toi for {0:d} {1:d}'.format(season, game)) sleep(1) # Don't want to overload NHL servers # It's most efficient to parse with page in memory, but for sake of simplicity will do it later # toi = read_toi_from_page(page) return True
def generate_season_schedule_file(season, force_overwrite=True): """ Reads season schedule from NHL API and writes to file. The output contains the following columns: - Season: int, the season - Date: str, the dates - Game: int, the game id - Type: str, the game type (for preseason vs regular season, etc) - Status: str, e.g. Final - Road: int, the road team ID - RoadScore: int, number of road team goals - RoadCoach str, 'N/A' when this function is run (edited later with road coach name) - Home: int, the home team ID - HomeScore: int, number of home team goals - HomeCoach: str, 'N/A' when this function is run (edited later with home coach name) - Venue: str, the name of the arena - Result: str, 'N/A' when this function is run (edited accordingly later from PoV of home team: W, OTW, SOL, etc) - PBPStatus: str, 'Not scraped' when this function is run (edited accordingly later) - TOIStatus: str, 'Not scraped' when this function is run (edited accordingly later) :param season: int, the season :param force_overwrite: bool. If True, generates entire file from scratch. If False, only redoes when not Final previously. :return: Nothing """ page = helpers.try_url_n_times(get_season_schedule_url(season)) page2 = json.loads(page) df = _create_schedule_dataframe_from_json(page2) df.loc[:, 'Season'] = season # Last step: we fill in some info from the pbp. If current schedule already exists, fill in that info. df = _fill_in_schedule_from_pbp(df, season) write_season_schedule(df, season, force_overwrite)
def scrape_game_toi_from_html(season, game, force_overwrite=True): """ This method scrapes the toi html logs for the given game. :param season: int, the season :param game: int, the game :param force_overwrite: bool. If file exists already, won't scrape again :return: nothing """ filenames = (get_home_shiftlog_filename(season, game), get_road_shiftlog_filename(season, game)) urls = (get_home_shiftlog_url(season, game), get_road_shiftlog_url(season, game)) filetypes = ('H', 'R') for i in range(2): filename = filenames[i] if not force_overwrite and os.path.exists(filename): pass page = helpers.try_url_n_times(urls[i]) save_raw_toi_from_html(page, season, game, filetypes[i]) sleep(1) # Don't want to overload NHL servers print('Scraped html toi for {0:d} {1:d}'.format(season, game))