Exemple #1
0
def get_player_info_from_url(playerid):
    """
    Gets ID, Name, Hand, Pos, DOB, Height, Weight, and Nationality from the NHL API.

    :param playerid: int, the player id

    :return: dict with player ID, name, handedness, position, etc
    """
    page = helpers.try_url_n_times(get_player_url(playerid))
    data = json.loads(page)

    info = {}
    vars_to_get = {'ID': ['people', 0, 'id'],
                   'Name': ['people', 0, 'fullName'],
                   'Hand': ['people', 0, 'shootsCatches'],
                   'Pos': ['people', 0, 'primaryPosition', 'code'],
                   'DOB': ['people', 0, 'birthDate'],
                   'Height': ['people', 0, 'height'],
                   'Weight': ['people', 0, 'weight'],
                   'Nationality': ['people', 0, 'nationality']}
    for key, val in vars_to_get.items():
        info[key] = helpers.try_to_access_dict(data, *val)

    # Remove the space in the middle of height
    if info['Height'] is not None:
        info['Height'] = info['Height'].replace(' ', '')
    return info
Exemple #2
0
def get_game_from_url(season, game):
    """
    Gets the page containing information for specified game from NHL API.

    :param season: int, the season
    :param game: int, the game

    :return: str, the page at the url
    """

    return helpers.try_url_n_times(get_game_url(season, game))
Exemple #3
0
def get_team_info_from_url(teamid):
    """
    Pulls ID, abbreviation, and name from the NHL API.

    :param teamid: int, the team ID

    :return: (id, abbrev, name)
    """

    teamid = int(teamid)
    page = helpers.try_url_n_times(get_team_info_url(teamid))
    if page is None:
        return None, None, None
    teaminfo = json.loads(page)

    tid = teaminfo['teams'][0]['id']
    tabbrev = teaminfo['teams'][0]['abbreviation']
    tname = teaminfo['teams'][0]['name']

    return tid, tabbrev, tname
Exemple #4
0
def scrape_game_toi(season, game, force_overwrite=False):
    """
    This method scrapes the toi for the given game.

    :param season: int, the season
    :param game: int, the game
    :param force_overwrite: bool. If file exists already, won't scrape again

    :return: nothing
    """
    filename = get_game_raw_toi_filename(season, game)
    if not force_overwrite and os.path.exists(filename):
        return False

    page = helpers.try_url_n_times(get_shift_url(season, game))
    save_raw_toi(page, season, game)
    # ed.print_and_log('Scraped toi for {0:d} {1:d}'.format(season, game))
    sleep(1)  # Don't want to overload NHL servers

    # It's most efficient to parse with page in memory, but for sake of simplicity will do it later
    # toi = read_toi_from_page(page)
    return True
Exemple #5
0
def generate_season_schedule_file(season, force_overwrite=True):
    """
    Reads season schedule from NHL API and writes to file.

    The output contains the following columns:

    - Season: int, the season
    - Date: str, the dates
    - Game: int, the game id
    - Type: str, the game type (for preseason vs regular season, etc)
    - Status: str, e.g. Final
    - Road: int, the road team ID
    - RoadScore: int, number of road team goals
    - RoadCoach str, 'N/A' when this function is run (edited later with road coach name)
    - Home: int, the home team ID
    - HomeScore: int, number of home team goals
    - HomeCoach: str, 'N/A' when this function is run (edited later with home coach name)
    - Venue: str, the name of the arena
    - Result: str, 'N/A' when this function is run (edited accordingly later from PoV of home team: W, OTW, SOL, etc)
    - PBPStatus: str, 'Not scraped' when this function is run (edited accordingly later)
    - TOIStatus: str, 'Not scraped' when this function is run (edited accordingly later)

    :param season: int, the season

    :param force_overwrite: bool. If True, generates entire file from scratch.
        If False, only redoes when not Final previously.

    :return: Nothing
    """
    page = helpers.try_url_n_times(get_season_schedule_url(season))

    page2 = json.loads(page)
    df = _create_schedule_dataframe_from_json(page2)
    df.loc[:, 'Season'] = season

    # Last step: we fill in some info from the pbp. If current schedule already exists, fill in that info.
    df = _fill_in_schedule_from_pbp(df, season)
    write_season_schedule(df, season, force_overwrite)
Exemple #6
0
def scrape_game_toi_from_html(season, game, force_overwrite=True):
    """
    This method scrapes the toi html logs for the given game.

    :param season: int, the season
    :param game: int, the game
    :param force_overwrite: bool. If file exists already, won't scrape again

    :return: nothing
    """
    filenames = (get_home_shiftlog_filename(season, game),
                 get_road_shiftlog_filename(season, game))
    urls = (get_home_shiftlog_url(season,
                                  game), get_road_shiftlog_url(season, game))
    filetypes = ('H', 'R')
    for i in range(2):
        filename = filenames[i]
        if not force_overwrite and os.path.exists(filename):
            pass

        page = helpers.try_url_n_times(urls[i])
        save_raw_toi_from_html(page, season, game, filetypes[i])
        sleep(1)  # Don't want to overload NHL servers
        print('Scraped html toi for {0:d} {1:d}'.format(season, game))