Python get_bref_tablesの例

プログラミング言語: Python

名前空間/パッケージ名: nba_stats.scraping.base_functions

メソッド/関数: get_bref_tables

hotexamples.comのコード掲載数: 4

Python get_bref_tables - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnba_stats.scraping.base_functions.get_bref_tablesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def get_all_players(players_soups):
    '''Takes soups of bref players and returns a df containing info of all players

    Keywork arguments:
    players_soups - A list of all the soups to be processed.
    '''
    players_dfs = []
    for p_soup in players_soups:
        players_df = get_bref_tables(p_soup, ['all_players'])
        players_dfs.append(players_df['all_players'])

    players = pd.concat(players_dfs)
    for i in [1, 2]:
        players.loc[:, 'pos' + str(i)] = players.pos.apply(
            lambda x: get_split(x, '-', i - 1))
        players.loc[:, 'college' + str(i)] = players.colleges.apply(
            lambda x: get_split(x, ',', i - 1))
    players.birth_date = players.birth_date.apply(
        lambda x: '' if x == '' else dt.datetime.strptime(x, '%B %d, %Y').date(
        ).strftime('%Y-%m-%d'))
    for column in ['year_max', 'year_min', 'weight']:
        players[column] = players[column].apply(lambda x: to_int(x))
    players.year_max = players.year_max.apply(lambda x: np.nan
                                              if x == CURRENT_SEASON else x)
    for column, idx in zip(['first_name', 'last_name'], [0, 1]):
        players.loc[:, column] = players.apply(
            lambda x: split_first_last(x['player'], x['bref'])[idx], axis=1)
    players.height = players.height.apply(lambda x: convert_feet(x))

    drop_columns = ['colleges', 'player', 'pos']
    players = players.drop(drop_columns, axis=1).reset_index(drop=True)

    return players

コード例 #2

ファイルを表示

def get_boxscore_htmls_month(year, month, headers=None, url_template=None):
    '''Returns a df containing info for all games in the given month.

    Keyword arguments:
    year -- the year the season ends in
    month -- the month as an integer
    headers -- override headers to use for the soup object (default None)
    url_template -- override template to use for url (default None)
    '''
    assert type(year) == int and type(
        month) == int, 'Year and month must be int'
    assert year <= CURRENT_YEAR + 1, 'Year must be before %s' % (CURRENT_YEAR +
                                                                 1)
    assert month >= 1 and month <= 12, 'Month must be between 1 and 12'

    if url_template == None:
        url_template = "https://www.basketball-reference.com/leagues/NBA_%year%_games-%month%.html"
    month_url = url_template.replace('%year%', str(year)).replace(
        '%month%', calendar.month_name[month].lower())
    soup = get_soup(month_url, headers)

    if soup:
        try:
            boxscores_month = get_bref_tables(soup, ['all_schedule'],
                                              'box_score_text')['all_schedule']
        except KeyError as e:
            logger_build.info(
                "Games table does not exist. Year: %s, month: %s." %
                (year, month))
            return None
        except:
            raise

        drop_columns = [
            'attendance', 'box_score_text', 'game_remarks', 'overtimes'
        ]
        boxscores_month.drop(drop_columns, inplace=True, axis=1)
        boxscores_month.rename(columns={
            'game_start_time': 'start_time',
            'home_team_name': 'home_team',
            'visitor_team_name': 'visitor_team'
        },
                               inplace=True)
        boxscores_month.date_game = boxscores_month.date_game.apply(
            lambda x: dt.datetime.strptime(x, '%a, %b %d, %Y').date().strftime(
                '%Y-%m-%d'))
        if 'start_time' in boxscores_month.columns:
            boxscores_month.start_time = boxscores_month.start_time.apply(
                lambda x: column_time(x))

        # keep only games that have been played
        boxscores_month = boxscores_month[
            boxscores_month.loc[:, 'home_pts'] != '']

        for home_visitor in ['home', 'visitor']:
            boxscores_month[home_visitor +
                            '_pts'] = boxscores_month[home_visitor +
                                                      '_pts'].astype(int)

        return boxscores_month

コード例 #3

ファイルを表示

def get_teams(url=None, headings=None):
    '''Returns a df containing the abbreviation and team name of all teams from bref page.

    Keywork arguments:
    url - the url to scrape, bref team page if none given (default None)
    headings - the headings to use when scraping, if none given uses default behaviour of get_soup (default None)
    '''
    if url == None:
        url = BREF_HTML + '/teams/'
    team_soup = get_soup(url, headings)

    tables = get_bref_tables(team_soup,
                             ['all_teams_active', 'all_teams_defunct'],
                             'franch_name')

    for key in tables.keys():
        tables[key].loc[:, 'team'] = tables[key].apply(
            lambda row: combine_columns(row['franch_name'], row['team_name']),
            axis=1)
    teams = pd.concat(tables).reset_index(drop=True)
    teams = teams.drop_duplicates('team').reset_index(drop=True)
    teams.loc[:, 'abbreviation'] = teams.bref.apply(lambda x: re.findall(
        '(?<=/teams/)[A-Z]{3}', x)[0] if type(x) == str else x)

    return teams[['abbreviation', 'team']]

コード例 #4

ファイルを表示

def get_boxscore(boxscore_soup, advanced=False):
    '''Returns a df containing boxscore data for both teams, given the soup of the boxscore url.
    pct fields are removed as these can be inferred from data.
    Advanced box score option is in development stage. Will return df but formatting not refined.

    Keyword arguments:
    boxscore_soup -- A soup object of the boxscore url
    advanced -- If True, returns the advanced box score (Default False)
    '''
    # start_time = time.time()
    table_dict = {}
    re_match = 'all_box-[A-Z]{3}-game-advanced' if advanced else 'all_box-[A-Z]{3}-game-basic'
    re_compile = re.compile(re_match)
    find_team_regex = '(?<=all_box_)[a-z]{3}(?=_advanced)' if advanced else '(?<=all_box_)[a-z]{3}(?=_basic)'

    tables = get_bref_tables(boxscore_soup, [re_compile])
    teams = get_away_home_teams(boxscore_soup)

    for key in tables.keys():
        if 'reason' in tables[key].keys():
            tables[key].loc[:, 'starter'] = tables[key].apply(
                lambda row: is_starter(row.name, row.reason), axis=1)
        else:
            tables[key].loc[:, 'starter'] = tables[key].apply(
                lambda row: is_starter(row.name), axis=1)
    #     team_abb = re.findall(find_team_regex, key)[0].upper()
    #     tables[key].loc[:,'team'] = team_abb
        tables[key].loc[:, 'team'] = teams[0]
        teams.pop(0)

    try:
        boxscore = pd.concat([tables[key] for key in tables.keys()],
                             sort=False).reset_index(drop=True)
    except ValueError as e:
        return pd.DataFrame()
    except:
        raise
    boxscore = boxscore[boxscore.player != 'Reserves']

    if advanced:
        column_drops = [
            'reason', 'player', 'efg_pct', 'ts_pct', 'fg3a_per_fga_pct',
            'fta_per_fga_pct', 'starter', 'team', 'mp', 'bpm'
        ]  #bpm newly added, should add at some point
    else:
        column_drops = ['reason', 'player'] + [
            header for header in boxscore.keys() if 'pct' in header
        ]
        boxscore['mp'] = boxscore['mp'].apply(lambda x: convert_mp(x))

    column_drops = [x for x in column_drops if x in boxscore.keys()]
    non_number = ['mp', 'player', 'starter', 'team']
    boxscore.drop(column_drops, axis=1, inplace=True)
    boxscore.rename(columns={'bref': 'player'}, inplace=True)
    for column in boxscore.columns:
        if column not in non_number:
            boxscore[column] = boxscore[column].apply(
                lambda x: to_int(x, 'pct' in column))

    # end_time = time.time()
    # export_txt(str(end_time - start_time) + '\n', 'boxscore_times_%label%.csv'.replace('%label%', test_csv_name))

    return boxscore