def __init__(self, db=None, cache_name='nbacom-agent', cookies=None, table_names=None): ''' Args: db (NBAPostgres): instance cache_name (str): for scraper cache_name cookies: cookie jar table_names (dict): Database table names ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() self.db = db if table_names: self.table_names = table_names else: self.table_names = { 'pgl': 'player_gamelogs', 'tgl': 'team_gamelogs', 'seas': 'season', 'pl': 'player', 'ps': 'playerstats_daily', 'ts': 'teamstats_daily', 'tod': 'team_opponent_dashboard', 'pbs': 'player_boxscores_combined', 'tbs': 'team_boxscores_combined', 'box2': 'game_boxscores', 'tm': 'team' }
def __init__(self, db=None): logging.getLogger(__name__).addHandler(logging.NullHandler()) self._dk_players = [] self.scraper = NBAComScraper() self.parser = NBAComParser() if db: self.nbadb = db
def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False
def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False
import sys import time from configparser import ConfigParser from nba.scrapers.nbacom import NBAComScraper from nba.db.nbacom import NBAComPg from nba.db.queries import missing_games_meta logging.basicConfig(stream=sys.stdout, level=logging.INFO) config = ConfigParser() configfn = os.path.join(os.path.expanduser('~'), '.pgcred') config.read(configfn) s = NBAComScraper(cache_name='games-meta') nbapg = NBAComPg(username=config['nbadb']['username'], password=config['nbadb']['password'], database=config['nbadb']['database']) teams = [] url = 'http://data.nba.com/data/10s/prod/v1/{game_date}/{game_id}_boxscore.json' headers = ['game_id', 'gamecode', 'game_date', 'team_code', ] for item in nbapg.select_dict(missing_games_meta()): try: content = s.get_json(url.format(game_date=item['game_date'], game_id=item['game_id'])) vteam_code = content['basicGameData']['vTeam']['triCode'] vscore = int(content['basicGameData']['vTeam']['score']) vls = [int(l['score']) for l in content['basicGameData']['vTeam']['linescore']] hteam_code = content['basicGameData']['hTeam']['triCode'] hscore = int(content['basicGameData']['hTeam']['score'])
def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper()
def load_data(per_mode, playerpool_size, thresh_gp, thresh_min, lastn=0, season_code=None, sortcol=None, fn=None): ''' Loads data from nba.com or csv file Args: per_mode (str): 'Totals', 'PerGame', 'Per48' playerpool_size (int): number of players in pool thresh_gp (int): minimum number of games played thresh_min (int): minimum number of minutes played lastn (int): last number of games, default 0 season_code (str): '2017-18', etc., default None sortcol (str): sort, default None fn (str): filename of csv, default None Returns: DataFrame ''' if fn: df = pd.read_csv(fn) else: scraper = NBAComScraper(cache_name='fbasim') parser = NBAComParser() if not season_code: season_code = current_season_code() content = scraper.playerstats(season_code, per_mode, lastn) df = pd.DataFrame(parser.playerstats(content, per_mode)) df['MIN'] = df['MIN'].astype(int) df = df.rename(index=str, columns={"TEAM_ABBREVIATION": "TEAM"}) # account for gp & minutes thresholds if thresh_gp and thresh_min: crit = (df.GP >= thresh_gp) & (df.MIN >= thresh_min) nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[crit][0:playerpool_size] else: df = df[crit] elif thresh_gp: crit = df.GP >= thresh_gp nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[0:playerpool_size] else: df = df[crit] elif thresh_min: crit = df.MIN >= thresh_min nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[0:playerpool_size] else: df = df[crit] # need index for joins in the sim df.set_index('PLAYER_ID') if sortcol: return df.sort_values(sortcol, ascending=False) else: return df