Exemple #1
0
def scrape_games(loops, count_max, action):
    stats_db = SqlDataframes()
    games_table = stats_db.read_table('games',['game_id','bref'])

    for i in range(loops):
        logger.info('Current Loop: {}'.format(i+1))
        logger.info('Running game soups...')

        cfg = get_dbconfig(section='scraping')
        checktables_cfg = cfg['check_tables'].split(',')

        id_bref_soup = get_game_soups(games_table, limit=count_max, check_tables=checktables_cfg)
        if not id_bref_soup:
            break
        add_basic_gamestats(id_bref_soup, commit_changes=action)
        if i == loops - 1:
            stats_db = SqlDataframes()
            games_max = stats_db.read_max('games','game_id')
            boxs_max = stats_db.read_max('boxscores', 'game_id')
            logger.info('FINISHED...Games remaining to scrape: {}'.format(games_max-boxs_max))
Exemple #2
0
def periodic_scrape(action):
    stats_db = SqlDataframes()

    add_allnbateams(action)
Exemple #3
0
def update_games(year):
    stats_db = SqlDataframes()

    players_soups = get_players_urls()

    players = get_all_players(players_soups)
    colleges = get_colleges(players)
    teams = get_teams()

    stats_db.add_to_db(colleges, 'colleges', check_column='college')
    players_ids = stats_db.apply_mappings(players, 'colleges',
                                          ['college1', 'college2'])
    stats_db.add_to_db(players_ids, 'players', check_column='bref')

    season_boxscore_htmls = get_boxscore_htmls_year(year, regular_length=False)
    games_ids = stats_db.apply_mappings(season_boxscore_htmls, 'teams',
                                        ['home_team', 'visitor_team'])
    stats_db.add_to_db(games_ids, 'games', 'bref', 'date_game')

    playoffs_ids = get_playoff_games((year, year))
    if playoffs_ids.empty:
        logger.info('No playoff games added for season: {}'.format(year))
    else:
        stats_db.add_to_db(playoffs_ids, 'playoffgames', 'game_id', 'game_id')
Exemple #4
0
import logging

from nba_stats.scraping.base_functions import get_soup, get_bref_soup, get_bref_tables, get_table
from nba_stats.scraping.functions import split_first_last, get_split, convert_feet, combine_columns, is_starter, to_int, convert_mp, include_comments, column_time
from nba_stats.read_write.db_insert import SqlDataframes
from nba_stats.read_write.functions import export_txt, create_schema_str

CURRENT_YEAR = dt.datetime.now().year
CURRENT_SEASON = CURRENT_YEAR + 1 if dt.datetime.now(
).month > 7 else CURRENT_YEAR
BREF_HTML = 'https://www.basketball-reference.com'
CRAWL_DELAY = 3
SEASON_TEAMS = {1977: 22, 1981: 23, 1989: 25, 1990: 27, 1996: 29, 2005: 30}
PLAYOFF_TEAMS = {1954: 6, 1967: 8, 1975: 10, 1977: 12, 1984: 16}

stats_db = SqlDataframes()

logger_build = logging.getLogger(__name__)
# handler = logging.StreamHandler()
# file_handler = logging.FileHandler("logging\\%s.log" % dt.datetime.today().strftime('%Y%m%d'))
# formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-10s %(message)s')
# for a_handler in [handler, file_handler]:
#     a_handler.setFormatter(formatter)
# logger_build.addHandler(handler)
# logger_build.addHandler(file_handler)
# logger_build.setLevel(logging.INFO)


def get_players_urls(players_url=None):
    '''Returns soup objects of bref player pages (a-z)