def scrape_games(loops, count_max, action): stats_db = SqlDataframes() games_table = stats_db.read_table('games',['game_id','bref']) for i in range(loops): logger.info('Current Loop: {}'.format(i+1)) logger.info('Running game soups...') cfg = get_dbconfig(section='scraping') checktables_cfg = cfg['check_tables'].split(',') id_bref_soup = get_game_soups(games_table, limit=count_max, check_tables=checktables_cfg) if not id_bref_soup: break add_basic_gamestats(id_bref_soup, commit_changes=action) if i == loops - 1: stats_db = SqlDataframes() games_max = stats_db.read_max('games','game_id') boxs_max = stats_db.read_max('boxscores', 'game_id') logger.info('FINISHED...Games remaining to scrape: {}'.format(games_max-boxs_max))
import pandas as pd import numpy as np import datetime as dt import re import time import matplotlib.pyplot as plt import seaborn as sns import mysql.connector as sql from nba_stats.read_write.config import get_dbconfig cfg = get_dbconfig() HOST = cfg['host'] PORT = int(cfg['port']) USER = cfg['user'] PASSWORD = cfg['password'] DB = cfg['db'] def isfloat(number): try: float(number) return True except (ValueError, TypeError): return False def add_where(original, add): if original == '': return 'WHERE ' + add else: