예제 #1
0
def scrape_list_of_games(games, if_scrape_shifts):
    """
    Given a list of game_id's (and a date for each game) it scrapes them
    
    :param games: list of [game_id, date]
    :param if_scrape_shifts: Boolean indicating whether to also scrape shifts 
    
    :return: DataFrame of pbp info, also shifts if specified
    """
    pbp_dfs = []
    shifts_dfs = []

    for game in games:
        pbp_df, shifts_df = game_scraper.scrape_game(str(game[0]), game[1],
                                                     if_scrape_shifts)
        if pbp_df is not None:
            pbp_dfs.extend([pbp_df])
        if shifts_df is not None:
            shifts_dfs.extend([shifts_df])

    # Check if any games
    if len(pbp_dfs) == 0:
        return None, None

    pbp_df = pd.concat(pbp_dfs)
    pbp_df = pbp_df.reset_index(drop=True)
    pbp_df.apply(lambda row: game_scraper.check_goalie(row), axis=1)

    if if_scrape_shifts:
        shifts_df = pd.concat(shifts_dfs)
        shifts_df = shifts_df.reset_index(drop=True)
    else:
        shifts_df = None

    return pbp_df, shifts_df
예제 #2
0
def test_scrape_game(pbp_columns, shifts_columns):
    """ Tests if scrape pbp and shifts for game correctly with and without shifts.
        Check:
            1. Returns either a DataFrame or None (for shifts when False)
            2. The number of rows is correct
            3. The columns are correct
     """

    # 1. Try first without shifts
    pbp, shifts = game_scraper.scrape_game("2016020475", "2016-12-18", False)
    assert isinstance(pbp, pd.DataFrame)
    assert shifts is None
    assert pbp.shape[0] == 326
    assert list(pbp.columns) == pbp_columns

    # 2. Try with shifts
    pbp, shifts = game_scraper.scrape_game("2007020222", "2007-11-08", True)
    assert isinstance(pbp, pd.DataFrame)
    assert isinstance(shifts, pd.DataFrame)
    assert pbp.shape[0] == 248
    assert shifts.shape[0] == 726
    assert list(pbp.columns) == pbp_columns
    assert list(shifts.columns) == shifts_columns