def __init__(self, db=None, cache_name='nbacom-agent', cookies=None, table_names=None): ''' Args: db (NBAPostgres): instance cache_name (str): for scraper cache_name cookies: cookie jar table_names (dict): Database table names ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() self.db = db if table_names: self.table_names = table_names else: self.table_names = { 'pgl': 'player_gamelogs', 'tgl': 'team_gamelogs', 'seas': 'season', 'pl': 'player', 'ps': 'playerstats_daily', 'ts': 'teamstats_daily', 'tod': 'team_opponent_dashboard', 'pbs': 'player_boxscores_combined', 'tbs': 'team_boxscores_combined', 'box2': 'game_boxscores', 'tm': 'team' }
def __init__(self, db=None): logging.getLogger(__name__).addHandler(logging.NullHandler()) self._dk_players = [] self.scraper = NBAComScraper() self.parser = NBAComParser() if db: self.nbadb = db
def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False
def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False
def __init__(self, db=False): self.logger = logging.getLogger(__name__) self._dk_players = [] self.scraper = NBAComScraper() self.parser = NBAComParser() if db: self.nbadb = NBAPostgres()
def __init__(self, db=True, safe=True): ''' Args: db (bool): compose NBAComPg object as self.nbadb safe (bool): create backups of tables prior to inserts ''' NBAAgent.__init__(self) self.logger = logging.getLogger(__name__) self.scraper = NBAComScraper() self.parser = NBAComParser() self.safe = safe self.nbas = NBASeasons() if db: self.nbadb = NBAComPg() else: self.nbadb = None
class NBAComAgent(NBAAgent): ''' Performs script-like tasks using NBA.com API Intended to replace standalone scripts so can use common API and tools Examples: a = NBAComAgent() gamelogs = a.cs_player_gamelogs('2015-16') ''' def __init__(self, db=True, safe=True): ''' Args: db (bool): compose NBAComPg object as self.nbadb safe (bool): create backups of tables prior to inserts ''' NBAAgent.__init__(self) self.logger = logging.getLogger(__name__) self.scraper = NBAComScraper() self.parser = NBAComParser() self.safe = safe self.nbas = NBASeasons() if db: self.nbadb = NBAComPg() else: self.nbadb = None def combine_boxscores(self, boxes, advanced_boxes): ''' Combines NBAComScraper.boxscores() and boxscores_advanced() Arguments: boxscores(list): list of 'base' boxscores boxscores(list): list of 'advanced' boxscores Returns: merged_players (list): base and advanced combined merged_teams (list): base and advanced combined Examples: a = NBAComAgent() combined = a.combine_boxscores(boxes, advanced_boxes) ''' merged_players = [] merged_teams = [] for gid, box in boxes.iteritems(): # players and teams are lists of dicts players, teams, starterbench = self.parser.boxscore(box) # players_adv and teams_adv are lists of dicts adv_box = advanced_boxes.get(gid) players_adv, teams_adv = self.parser.boxscore_advanced(adv_box) # need to transform into dicts players_dict = {p['PLAYER_ID']: p for p in players} players_adv_dict = {p['PLAYER_ID']: p for p in players_adv} teams_dict = {t['TEAM_ID']: t for t in teams} teams_adv_dict = {t['TEAM_ID']: t for t in teams_adv} # now loop through players for pid, player in players_dict.iteritems(): player_adv = players_adv_dict.get(pid) if player_adv: merged_players.append(self.merge_boxes(player, player_adv)) # now loop through teams for tid, team in teams_dict.iteritems(): team_adv = teams_adv_dict.get(tid) if team_adv: merged_teams.append(self.merge_boxes(team, team_adv)) self.nbadb.insert_boxscores(merged_players, merged_teams) def commonallplayers(self, season): ''' Solves problem of players changing teams nba.com updates player teams regularly, so i look every day to make sure lists accurate Arguments: season (str): in YYYY-YY format Returns: to_insert (list): list of players that needed to be updated Examples: a = NBAComAgent() combined = a.commonallplayers('2015-16') ''' game_date = dt.datetime.today() players = self.parser.players(self.scraper.players(season=season, cs_only='1')) to_insert = [] convert = { "PERSON_ID": 'nbacom_player_id', "DISPLAY_LAST_COMMA_FIRST": '', "DISPLAY_FIRST_LAST": 'display_first_last', "ROSTERSTATUS": 'rosterstatus', "FROM_YEAR": '', "TO_YEAR": '', "PLAYERCODE": '', "TEAM_ID": 'team_id', "TEAM_CITY": '', "TEAM_NAME": '', "TEAM_ABBREVIATION": 'team_code', "TEAM_CODE": '', "GAMES_PLAYED_FLAG": '' } for p in players: pti = {'game_date': game_date, 'nbacom_season_id': 22015, 'season': 2016} for k,v in p.iteritems(): converted = convert.get(k) if converted: pti[converted] = v to_insert.append(pti) if self.nbadb: if to_insert: self.nbadb.insert_dicts(to_insert, 'stats.playerteams') return to_insert def cs_player_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches player_gamelogs and updates cs_player_gamelogs table Arguments: season (str): in YYYY-YY format (2015-16) Returns: players (list): player dictionary of stats + dfs points ''' gamelogs = self.parser.season_gamelogs(self.scraper.season_gamelogs(season, 'P'), 'P') table_name = 'stats.cs_player_gamelogs' if self.nbadb: if self.safe: self.nbadb.postgres_backup_table(self.nbadb.database, table_name) gamelogs = self.nbadb.insert_player_gamelogs(gamelogs, table_name) self.nbadb.update_positions(table_name) self.nbadb.update_teamids(table_name) return gamelogs def cs_playerstats(self, season, date_from=None, date_to=None): ''' Fetches cs_player_stats and updates database table Arguments: season (str): in YYYY-YY format (2015-16) date_from (str): in %Y-%m-%d format, default beginning of season date_from (str): in %Y-%m-%d format, default yesterday Returns: player_stats (list): player dictionary of basic and advanced stats Examples: a = NBAComAgent() ps = a.cs_playerstats('2015-16') ps = a.cs_playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' # default is to get entire season through yesterday yesterday = dt.datetime.strftime(dt.datetime.today() - dt.timedelta(1), '%Y-%m-%d') if not date_from: date_from = self.nbas.season_start(season) if not date_to: date_to = yesterday ps_base = self.parser.playerstats(self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats(self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) # now need to merge base and advanced ps_base = {p['PLAYER_ID']: p for p in ps_base} for ps_adv in ps_advanced: pid = ps_adv['PLAYER_ID'] base = ps_base.get(pid) if base: base.update(ps_adv) ps_base[pid] = base return self.nbadb.insert_playerstats(ps_base.values(), table_name='stats.cs_playerstats', game_date=yesterday) def cs_team_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches team_gamelogs and updates cs_team_gamelogs table Arguments: season (str): in YYYY-YY format (2015-16) Returns: team_gl (list): player dictionary of stats Examples: a = NBAComAgent() tgl = a.cs_team_gamelogs('2015-16') tgl = a.cs_playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' gamelogs = self.parser.season_gamelogs(self.scraper.season_gamelogs(season='2015-16', player_or_team='T'), 'T') self.logger.debug('there are {0} team gamelogs'.format(len(gamelogs))) if self.nbadb: table_name = 'stats.cs_team_gamelogs' if self.safe: self.nbadb.postgres_backup_table(self.nbadb.database, table_name) gamelogs = self.nbadb.insert_team_gamelogs(gamelogs, table_name) self.logger.debug('there are now {0} team gamelogs'.format(len(gamelogs))) return gamelogs def cs_teamstats(self, season, date_from=None, date_to=None): ''' Fetches leaguedashteamstats and updates cs_leaguedashteamstats table Arguments: season (str): in YYYY-YY format (2015-16) date_from (str): in %Y-%m-%d format, default beginning of season date_from (str): in %Y-%m-%d format, default yesterday Returns: teamstats (list): team dictionary of basic and advanced stats Examples: a = NBAComAgent() ps = a.cs_teamstats('2015-16') ps = a.cs_teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' # default is to get entire season through yesterday yesterday = dt.datetime.strftime(dt.datetime.today() - dt.timedelta(1), '%Y-%m-%d') if not date_from: date_from = self.nbas.season_start(season) if not date_to: date_to = yesterday ts_base = self.parser.teamstats(self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to)) ts_adv = self.parser.teamstats(self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) # now need to merge base and advanced ts_base = {t['TEAM_ID']: t for t in ts_base} for ts_adv in ts_adv: tid = ts_adv['TEAM_ID'] base = ts_base.get(tid) if base: base.update(ts_adv) ts_base[tid] = base self.nbadb.insert_teamstats(ts_base.values(), table_name='stats.cs_teamstats', game_date=yesterday) return ts_base, ts_adv def merge_boxes(self, b1, b2): ''' Combines base and advanced player or team boxscores from same game Arguments: base_box(dict): base boxscore adv_box(dict): advanced boxscore Returns: merged (dict) or None Examples: a = NBAComAgent() merged = a.merge_boxes(base_box, adv_box) ''' z = b1 z.update(b2) return z def players_to_add(self): ''' Compare current_season_gamelogs and players tables to see if missing players in latter Arguments: None Returns: list ''' sql = '''SELECT * FROM vw_add_players_table''' return self.nbadb.select_dict(sql) def scoreboards(self, season_start, season_end, pkl_fname=None): ''' Downloads and parses range of scoreboards, optionally saves to pickle file Arguments: season_start (str): in %Y-%m-%d format season_end (str): in %Y-%m-%d format pkl_fname (optional [str]): example - 'scoreboards_2015-16.pkl' Returns: scoreboards (list): scoreboard dicts Examples: a = NBAComAgent() sb = a.scoreboards() sb = a.scoreboards(pkl_fname = 'scoreboards_2015-16.pkl') sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15') ''' scoreboards = [] for day in reversed(self.date_list(season_end, season_start)): game_date = dt.datetime.strftime(day, '%Y-%m-%d') scoreboard_json = self.nbas.scoreboard(game_date=game_date) scoreboard = self.nbap.scoreboard(scoreboard_json, game_date=game_date) scoreboards.append(scoreboard) if pkl_fname: try: with open('/home/sansbacon/scoreboards_20160108.pkl', 'wb') as outfile: pickle.dump(scoreboards, outfile) except: logging.error('could not save scoreboards to {0}'.format(pkl_fname)) return scoreboards def teamgames(self, games): ''' Converts list of games into list in teamgames format, where there are2 teamgames for every game Arguments: games(list): list of games from nba.com where two teams are in 1 row (visitor, home) Returns: teamgames(list): list of games in teamgames format, 2 teamgames per game row Examples: # is in format {'game_id', 'visitor_team_id', 'home_team_id', . . . } games = NBAPostgres.select_dict('SELECT * FROM games') # is in format {'game_id', 'team_id', 'opponent_team_id', 'is_home' . . . } teamgames = NBAComAgent.teamgames(games) ''' teamgames = [] to_drop = ['home_team_code', 'home_team_id', 'visitor_team_code', 'visitor_team_id'] for game in games: tg1 = copy.deepcopy(game) tg1['team_code'] = game['home_team_code'] tg1['team_id'] = game['home_team_id'] tg1['opponent_team_code'] = game['visitor_team_code'] tg1['opponent_team_id'] = game['visitor_team_id'] tg1['is_home'] = True teamgames.append({k:v for k,v in tg1.iteritems() if not k in to_drop}) tg2 = copy.deepcopy(game) tg2['team_code'] = game['visitor_team_code'] tg2['team_id'] = game['visitor_team_id'] tg2['opponent_team_code'] = game['home_team_code'] tg2['opponent_team_id'] = game['home_team_id'] tg2['is_home'] = False teamgames.append({k:v for k,v in tg2.iteritems() if not k in to_drop}) return teamgames def team_opponents(self, season, season_start=None, season_end=None, pkl_fname=None): ''' Downloads and parses range of team_opponents, optionally saves to pickle file Arguments: season (str): in YYYY-YY format season_start (str): in %Y-%m-%d format, default is actual start of season season_end (str): in %Y-%m-%d format, default is actual end of season pkl_fname (optional [str]): example - 'scoreboards_2015-16.pkl' Returns: topp (list): dicts Examples: a = NBAComAgent() topp = a.team_opponents('2014-15') ''' topp = [] # figure out season_start, season end if season_start is None: days = self.nbas.season_dates('2014-15') season_start = dt.datetime.strftime(days[-1], '%Y-%m-%d') else: days = date_list(season_end, season_start) for day in reversed(days): content = self.scraper.team_opponent_dashboard(season, DateFrom=season_start, DateTo=day) teamstats_opp = self.parser.team_opponent_dashboard(content) for team in teamstats_opp: fixed_team = {k.lower():v for k,v in team.iteritems()} fixed_team['game_date'] = dt.datetime.strftime(day, '%Y-%m-%d') topp.append(fixed_team) if pkl_fname: try: with open(pkl_fname, 'wb') as outfile: pickle.dump(topp, outfile) except: self.logger.error('could not save scoreboards to {0}'.format(pkl_fname)) return topp
class NBAComAgent(object): ''' Performs script-like tasks using NBA.com API ''' def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False def _combined_player_boxscores(self, gid): ''' Combines 5 types of boxscores from nba.com API into list of boxscores Arguments: gid: string game ID, with leading '00' Returns: list of player boxscores combined - traditional, advanced, misc, scoring, usage Examples: a = NBAComAgent() playerboxes = a.combined_player_boxscores('0020161001') ''' traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional( self.scraper.boxscore_traditional(gid)) adv_players, adv_team = self.parser.boxscore_advanced( self.scraper.boxscore_advanced(gid)) misc_players, misc_team = self.parser.boxscore_misc( self.scraper.boxscore_misc(gid)) scoring_players, scoring_team = self.parser.boxscore_scoring( self.scraper.boxscore_scoring(gid)) usage_players = self.parser.boxscore_usage( self.scraper.boxscore_usage(gid)) # now need to combine player and team boxscores players = merge( dict(), [{t['PLAYER_ID']: t for t in traditional_players}, {t['PLAYER_ID']: t for t in adv_players}, {t['PLAYER_ID']: t for t in misc_players}, {t['PLAYER_ID']: t for t in scoring_players}, {t['PLAYER_ID']: t for t in usage_players}]) return players.values() def combined_team_boxscores(self, gid): ''' Combines 5 types of boxscores from nba.com API into list of boxscores Arguments: gid: string game ID, with leading '00' Returns: list of team boxscores - combined traditional, advanced, misc, scoring Examples: a = NBAComAgent() teamboxes = a.combined_team_boxscores('0020161001') ''' traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional( self.scraper.boxscore_traditional(gid)) adv_players, adv_teams = self.parser.boxscore_advanced( self.scraper.boxscore_advanced(gid)) misc_players, misc_teams = self.parser.boxscore_misc( self.scraper.boxscore_misc(gid)) scoring_players, scoring_teams = self.parser.boxscore_scoring( self.scraper.boxscore_scoring(gid)) # now need to combine player and team boxscores teams = merge( dict(), [{t['TEAM_ID']: t for t in traditional_teams}, {t['TEAM_ID']: t for t in adv_teams}, {t['TEAM_ID']: t for t in misc_teams}, {t['TEAM_ID']: t for t in scoring_teams}]) return list(teams.values()) def linescores(self): ''' Updates gamesmeta table with game_information ''' #q = """SELECT '00' || game_id, to_char(game_date, 'YYYYmmdd') FROM gamesmeta # WHERE season = (select max(season) from seasons) AND game_date < now()::date AND q1 IS NULL # ORDER BY game_date DESC;""" #q = """SELECT '00' || game_id as gid, to_char(game_date, 'YYYYmmdd') as gd FROM cs_games # WHERE game_date < (CURRENT_TIMESTAMP AT TIME ZONE 'CST')::date AND # game_id NOT IN (SELECT DISTINCT game_id FROM boxv2015) # ORDER BY game_date DESC;""" q = """SELECT '00' || game_id as gid, to_char(game_date, 'YYYYmmdd') as gd FROM games WHERE game_date < localdate() AND season > 2015 AND game_id NOT IN (SELECT DISTINCT game_id FROM boxv2015) ORDER BY game_date DESC;""" for g in self.db.select_dict(q): try: content = self.scraper.boxscore_v2015(g['gid'], g['gd']) v, h = self.parser.boxscore_v2015(content) self.db.insert_dicts([v, h], 'boxv2015') logging.info('finished {} - {}'.format(g['gd'], g['gid'])) except Exception as e: logging.error('could not get {}'.format(g)) logging.exception(e) def new_players(self, season): ''' Updates players table with missing players Arguments: season (str): in YYYY-YY format Returns: list of players to add to stats.players Examples: a = NBAComAgent(cache_name='newplayers', cookies=httplib.CookieJar(), db=NBAComPg(...)) np = a.new_players(season='2015-16') ''' content = self.scraper.players_v2015(season) players = self.parser.players_v2015(content) currids = set([int(p.get('personId', 0)) for p in players]) logging.debug(currids) allids = set( self.db.select_list('SELECT nbacom_player_id from players')) missing = currids - allids if missing: np = [p for p in players if int(p['personId']) in missing] if self.insert_db: self.db.insert_players_v2015(np) return np else: return None def player_boxscores_combined(self): ''' Fetches player boxscores combined Arguments: season: str in YYYY-YY format (2015-16) Returns: players (list): player boxscores ''' pboxes = [] gids = self.db.select_list(missing_player_boxscores()) if not gids: logging.error('no missing gameids found') return None logging.info('there are {} missing game boxscores'.format(len(gids))) for gid in gids: logging.info('getting {}'.format(gid)) box = self._combined_player_boxscores(gid) if not box: logging.error('no box for {}'.format(gid)) continue if self.insert_db: self.db.insert_player_boxscores(box) pboxes.append(box) return [item for sublist in pboxes for item in sublist] def player_boxscores_combined(self): ''' Fetches player boxscores combined Arguments: season: str in YYYY-YY format (2015-16) Returns: players (list): player boxscores ''' pboxes = [] gids = self.db.select_list(missing_player_boxscores()) if not gids: logging.error('no missing gameids found') return None logging.info('there are {} missing game boxscores'.format(len(gids))) for gid in gids: logging.info('getting {}'.format(gid)) box = self._combined_player_boxscores(gid) if not box: logging.error('no box for {}'.format(gid)) continue if self.insert_db: self.db.insert_player_boxscores(box) pboxes.append(box) return [item for sublist in pboxes for item in sublist] def player_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches player_gamelogs and updates player_gamelogs table Arguments: season: str in YYYY-YY format (2015-16) date_from: str in YYYY-mm-dd format date_to: str in YYYY-mm-dd format Returns: players (list): player dictionary of stats + dfs points ''' pgl = self.parser.season_gamelogs( self.scraper.season_gamelogs(season, 'P'), 'P') if self.insert_db: self.db.insert_player_gamelogs(pgl) return pgl def playerstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches playerstats and updates player_gamelogs table Arguments: season: str in YYYY-YY format (2015-16) date_from: str in YYYY-mm-dd format date_to: str in YYYY-mm-dd format all_missing: boolean Returns: players (list): player dictionary of stats + dfs points Examples: a = NBAComAgent() np = a.playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' if date_from and date_to: ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ps = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ps_base, ps_advanced) ] if self.insert_db: self.db.insert_playerstats(ps, as_of=date_to) return ps elif all_missing: pstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list(missing_playerstats()): daystr = datetostr(day, 'nba') ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ps = [ merge(dict(), [psadv, psb]) for psb, psadv in zip(ps_base, ps_advanced) ] pstats[daystr] = ps if self.insert_db: self.db.insert_playerstats(ps, as_of=daystr) logging.info('completed {}'.format(daystr)) return pstats else: raise ValueError( 'need to specify dates or set all_missing to true') def scoreboards(self, season_start, season_end): ''' Downloads and parses range of scoreboards Arguments: season_start (str): in %Y-%m-%d format season_end (str): in %Y-%m-%d format Returns: scoreboards (list): scoreboard dicts Examples: a = NBAComAgent() sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15') ''' scoreboards = [] for day in reversed(date_list(season_end, season_start)): game_date = dt.datetime.strftime(day, '%Y-%m-%d') scoreboard_json = self.scraper.scoreboard(game_date=game_date) scoreboard = self.parser.scoreboard(scoreboard_json, game_date=game_date) scoreboards.append(scoreboard) if self.insert_db: self.db.insert_scoreboards(scoreboards) return scoreboards def team_boxscores_combined(self): ''' Fetches team boxscores combined Returns: tboxes: list of boxscores ''' tboxes = [] gids = self.db.select_list(missing_team_boxscores()) if not gids: logging.error('no missing gameids found') return None logging.info('there are {} missing game boxscores'.format(len(gids))) for gid in gids: logging.info('getting {}'.format(gid)) box = self.combined_team_boxscores(gid) if not box: logging.error('no box for {}'.format(gid)) continue if self.insert_db: self.db.insert_team_boxscores(box) tboxes.append(box) return [item for sublist in tboxes for item in sublist] def team_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches team_gamelogs and updates cs_team_gamelogs table Arguments: season (str): in YYYY-YY format (2015-16) Returns: team_gl (list): player dictionary of stats Examples: a = NBAComAgent() tgl = a.team_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True) ''' content = self.scraper.season_gamelogs(season=season, player_or_team='T') tgl = self.parser.season_gamelogs(content, 'T') mtgl = self.db.missing_tgl() if tgl and mtgl: toins = [gl for gl in tgl if gl.get('GAME_ID', None) in mtgl] if self.insert_db: self.db.insert_team_gamelogs(toins) return toins else: logging.error('no team gamelogs to insert') def teamstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches teamstats and updates database table Arguments: season (str): in YYYY-YY format (2015-16) date_from (str): in %Y-%m-%d format, default beginning of season date_from (str): in %Y-%m-%d format, default yesterday all_missing: boolean Returns: list of team dictionary of basic and advanced stats Examples: a = NBAComAgent() ps = a.teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ps = a.teamstats(season='2015-16', all_missing=True) ''' if date_from and date_to: ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ts = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] if self.insert_db: self.db.insert_teamstats(ts, as_of=date_to) return ts elif all_missing: tstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list(missing_teamstats()): daystr = datetostr(day, 'nba') ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ts = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] tstats[daystr] = ts if self.insert_db: self.db.insert_teamstats(ts, as_of=daystr) logging.debug('teamstats: completed {}'.format(daystr)) else: logging.error('did not insert: {}'.format(ts)) return tstats else: raise ValueError( 'need to specify dates or set all_missing to true') def team_opponent_dashboards(self, season, date_from=None, date_to=None, all_missing=False): ''' Downloads and parses range of team_opponents Arguments: season (str): in YYYY-YY format date_from (str): in %Y-%m-%d format, default is actual start of season date_to (str): in %Y-%m-%d format, default is actual end of season all_missing (bool): get all missing dashboards Returns: topp (list): dicts Examples: a = NBAComAgent() topp = a.team_opponent_dashboards('2014-15') ''' if date_from and date_to: content = self.scraper.team_opponent_dashboard(season, DateFrom=date_from, DateTo=date_to) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=date_to) return topp elif all_missing: topps = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list(missing_team_opponent_dashboard()): daystr = datetostr(day, 'nba') content = self.scraper.team_opponent_dashboard(season, DateFrom=start, DateTo=daystr) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=daystr) topps[daystr] = topp return topps else: raise ValueError( 'need to specify dates or set all_missing to true') def update_player_positions(self): ''' Trying to make sure all position data is current Only info in nba.com is PLAYER key, this is only Guard, etc. Unclear where the PG, etc. comes from TODO: this is not functional yet ''' if not self.db: raise ValueError('need database connection to update players') q = """SELECT nbacom_player_id FROM stats.players2 WHERE nbacom_position IS NULL or nbacom_position = ''""" uq = """UPDATE stats.players2 SET nbacom_position = '{}' WHERE nbacom_player_id = {}""" for pid in self.db.select_list(q): logging.debug('getting {}'.format(pid)) pinfo = self.parser.player_info( self.scraper.player_info(pid, '2015-16')) if pinfo.get('POSITION'): self.db.update(uq.format(pinfo.get('POSITION'), pid)) logging.debug('inserted {}'.format( pinfo.get('DISPLAY_FIRST_LAST')))
class NBAPlayers(object): ''' TODO: need to update / merge this with the names module Provides for updating nba.com players table (stats.players) Also permits cross-reference of player names and player ids from various sites(stats.player_xref) Usage: np = NBAComPlayers(db=True) np.missing_players('2015-16') ''' def __init__(self, db=False): self.logger = logging.getLogger(__name__) self._dk_players = [] self.scraper = NBAComScraper() self.parser = NBAComParser() if db: self.nbadb = NBAPostgres() @property def dk_players(self): ''' Dictionary of player_name: draftkings_player_id Returns: dk_players (dict): key is player name, value is dk_player_id ''' if not self._dk_players: try: fn = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'dk_players.json') with open(fn, 'r') as infile: self._dk_players = json.load(infile) except: self.logger.exception('could not open dk_players json file') return self._dk_players def missing_players(self, season): ''' Looks for missing players by comparing current_season_gamelogs and players tables Fetches player_info by id, inserts player into players table Arguments: season (str): example '2015-16' for the current season Returns: missing (list): list of dictionaries that represent row in players table ''' if not self.nbadb: raise ValueError('missing_players requires a database connection') missing = [] # get list of ids that appear in current_season_gamelogs but not players sql = ''' SELECT DISTINCT player_id FROM stats.cs_player_gamelogs WHERE player_id NOT IN (SELECT DISTINCT nbacom_player_id FROM stats.players) ''' results = self.nbadb.select_list(sql) if results: for pid in results: content = self.scraper.player_info(pid, season) pi = self.parser.player_info(content) pi = {k.lower(): v for k,v in pi.iteritems()} pi.pop('games_played_flag', None) pi['nbacom_team_id'] = pi.get('team_id', None) pi.pop('team_id', None) pi['nbacom_position'] = pi.get('position', None) pi.pop('position', None) pi['nbacom_player_id'] = pi.get('person_id', None) pi.pop('person_id', None) if pi.get('height', None): try: feet, inches = pi['height'].split('-') pi['height'] = int(feet)*12 + int(inches) except: pass # have to convert empty strings to None, otherwise insert fails for integer/numeric columns player_info= {} for k,v in pi.iteritems(): if not v: player_info[k] = None else: player_info[k] = v missing.append(player_info) if self.polite: time.sleep(1) if missing: self.nbadb.insert_dicts(missing, 'stats.players') return missing def recent_nbacom_players(self): ''' Gets last couple of years of players from nba.com Returns dictionary with key of name + team ''' if not self.nbadb: return None else: # can also return in dict format # {'{0} {1}'.format(item['player_name'], item.get('team_code')):item for item in self.nbadb.select_dict(sql)} sql = '''SELECT * FROM recent_nba_players''' return self.nbadb.select_dict(sql) def player_xref(self, site_name): ''' Obtains dictionary of site_player_id and nbacom_player_id Args: site_name (str): 'dk', 'fantasylabs', etc. Return: player_xref (dict): key is site_player_id ''' sql = '''SELECT nbacom_player_id, site_player_id FROM stats.player_xref WHERE site='{0}' ORDER by nbacom_player_id''' xref = self.nbadb.select_dict(sql.format(site_name)) return {p.get('site_player_id'): p.get('nbacom_player_id') for p in xref if p.get('site_player_id')} def site_to_nbacom(self, site, player_name): ''' Returns dictionary with name of player on site, value is list of name and id of player on nba.com Arguments: site (str): 'dk', 'fd', 'rg', 'doug', 'espn' Returns: players (dict): key is player name on site, value is list [nbacom_player_name, nbacom_player id] ''' if site.lower() not in self.sites: # should try name matcher pass elif site.lower() == 'dk': return self._dk_name(player_name) elif 'doug' in site.lower(): return self._doug_name(player_name) elif 'espn' in site.lower(): return self._espn_name(player_name) elif site.lower() == 'fd' or 'duel' in site.lower(): return self._fd_name(player_name) elif site.lower() == 'fl' or 'labs' in site.lower(): return self._fl_name(player_name) elif site.lower() == 'rg' or 'guru' in site.lower(): return self._rg_name(player_name)
def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper() self.nbp = NBAComParser()
class NBAPlayers(object): ''' TODO: need to update / merge this with the names module Provides for updating nba.com players table (stats.players) Also permits cross-reference of player names and player ids from various sites(stats.player_xref) Usage: np = NBAComPlayers(db=True) np.missing_players('2015-16') ''' def __init__(self, db=None): logging.getLogger(__name__).addHandler(logging.NullHandler()) self._dk_players = [] self.scraper = NBAComScraper() self.parser = NBAComParser() if db: self.nbadb = db def _convert_heigt(self, h): ''' Converts height from 6-11 (feet-inches) to 73 (inches) Args: h(str): e.g. 6-7, 5-11, 7-1 Returns: height(int): e.g. 79, 71, 85 ''' try: f, i = h.split('-') return int(f) * 12 + int(i) except: return None @property def dk_players(self): ''' Dictionary of player_name: draftkings_player_id Returns: dk_players (dict): key is player name, value is dk_player_id ''' if not self._dk_players: try: fn = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'dk_players.json') with open(fn, 'r') as infile: self._dk_players = json.load(infile) except: logging.exception('could not open dk_players json file') return self._dk_players def missing_players(self, season): ''' Looks for missing players by comparing current_season_gamelogs and players tables Fetches player_info by id, inserts player into players table Arguments: season (str): example '2015-16' for the current season Returns: missing (list): list of dictionaries that represent row in players table ''' if not self.nbadb: raise ValueError('missing_players requires a database connection') missing = [] # get list of ids that appear in current_season_gamelogs but not players sql = '''SELECT * FROM stats.players_to_add''' for pid in self.nbadb.select_list(sql): content = self.scraper.player_info(pid, season) pinfo = self.parser.player_info(content) pi = {k.lower(): v for k, v in pinfo.items()} pi.pop('games_played_flag', None) pi['nbacom_team_id'] = pi.get('team_id', None) pi.pop('team_id', None) pi['nbacom_position'] = pi.get('position', None) pi.pop('position', None) pi['nbacom_player_id'] = pi.get('person_id', None) pi.pop('person_id', None) if pi.get('height', None): pi['height'] = self._convert_heigt(pi['height']) # have to convert empty strings to None, otherwise insert fails for integer/numeric columns player_info = {} for k, v in pi.items(): if not v: player_info[k] = None else: player_info[k] = v missing.append(player_info) if self.polite: time.sleep(1) if missing: self.nbadb.insert_dicts(missing, 'stats.players') return missing def player_xref(self, site_name): ''' Obtains dictionary of site_player_id and nbacom_player_id Args: site_name (str): 'dk', 'fantasylabs', etc. Return: player_xref (dict): key is site_player_id ''' sql = '''SELECT nbacom_player_id, site_player_id FROM stats.player_xref WHERE site='{0}' ORDER by nbacom_player_id''' xref = self.nbadb.select_dict(sql.format(site_name)) return { p.get('site_player_id'): p.get('nbacom_player_id') for p in xref if p.get('site_player_id') } def site_to_nbacom(self, site, player_name): ''' Returns dictionary with name of player on site, value is list of name and id of player on nba.com Arguments: site (str): 'dk', 'fd', 'rg', 'doug', 'espn' Returns: players (dict): key is player name on site, value is list [nbacom_player_name, nbacom_player id] ''' if site.lower() not in self.sites: # should try name matcher pass elif site.lower() == 'dk': return self._dk_name(player_name) elif 'doug' in site.lower(): return self._doug_name(player_name) elif 'espn' in site.lower(): return self._espn_name(player_name) elif site.lower() == 'fd' or 'duel' in site.lower(): return self._fd_name(player_name) elif site.lower() == 'fl' or 'labs' in site.lower(): return self._fl_name(player_name) elif site.lower() == 'rg' or 'guru' in site.lower(): return self._rg_name(player_name)
class NBAComScraper_test(unittest.TestCase): def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper() def test_boxscore(self): box = self.nbs.boxscore('0021500001', '2015-16') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) def test_playerstats(self): ps = self.nbs.playerstats('2015-16') self.assertIsInstance(ps, dict) rs = ps.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_player_info(self): pinfo = self.nbs.player_info('203083', '2015-16') self.assertIsInstance(pinfo, dict) rs = pinfo.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_one_player_gamelogs(self): pgl = self.nbs.one_player_gamelogs('203083', '2015-16') self.assertIsInstance(pgl, dict) rs = pgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_players (self): players = self.nbs.players(season='2015-16', IsOnlyCurrentSeason='1') self.assertIsInstance(players, dict) rs = players.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_season_gamelogs(self): pgl = self.nbs.season_gamelogs(season='2015-16', player_or_team='P') self.assertIsInstance(pgl, dict) rs = pgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) tgl = self.nbs.season_gamelogs(season='2015-16', player_or_team='T') self.assertIsInstance(tgl, dict) rs = tgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_team_dashboard(self): team_id = '1610612765' season = '2015-16' tdb = self.nbs.team_dashboard(team_id, season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_team_opponent_dashboard(self): season = '2015-16' tdb = self.nbs.team_opponent_dashboard(season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_one_team_gamelogs(self): team_id = '1610612765' season = '2015-16' tdb = self.nbs.one_team_gamelogs(team_id, season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_teamstats(self): season = '2015-16' ts = self.nbs.team_opponent_dashboard(season) self.assertIsInstance(ts, dict) rs = ts.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_teams(self): t = self.nbs.teams() self.assertIsInstance(t, basestring) self.assertRegexpMatches(t, r'Pistons')
def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper()
class NBAComScraper_test(unittest.TestCase): def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper() def test_boxscore(self): box = self.nbs.boxscore_traditional('0021500001') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) box = self.nbs.boxscore_advanced('0021500001') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) box = self.nbs.boxscore_scoring('0021500001') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) box = self.nbs.boxscore_misc('0021500001') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) box = self.nbs.boxscore_usage('0021500001') self.assertIsInstance(box, dict) self.assertIsNotNone(box.get('resultSets', None)) def test_playerstats(self): ps = self.nbs.playerstats('2016-17') self.assertIsInstance(ps, dict) rs = ps.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_player_info(self): pinfo = self.nbs.player_info('203083', '2016-17') self.assertIsInstance(pinfo, dict) rs = pinfo.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_one_player_gamelogs(self): pgl = self.nbs.one_player_gamelogs('203083', '2016-17') self.assertIsInstance(pgl, dict) rs = pgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_players(self): players = self.nbs.players(season='2016-17', cs_only='1') self.assertIsInstance(players, dict) rs = players.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_season_gamelogs(self): pgl = self.nbs.season_gamelogs(season='2016-17', player_or_team='P') self.assertIsInstance(pgl, dict) rs = pgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) tgl = self.nbs.season_gamelogs(season='2016-17', player_or_team='T') self.assertIsInstance(tgl, dict) rs = tgl.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_team_dashboard(self): team_id = '1610612765' season = '2016-17' tdb = self.nbs.team_dashboard(team_id, season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_team_opponent_dashboard(self): season = '2016-17' tdb = self.nbs.team_opponent_dashboard(season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_one_team_gamelogs(self): team_id = '1610612765' season = '2016-17' tdb = self.nbs.one_team_gamelogs(team_id, season) self.assertIsInstance(tdb, dict) rs = tdb.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_teamstats(self): season = '2016-17' ts = self.nbs.team_opponent_dashboard(season) self.assertIsInstance(ts, dict) rs = ts.get('resultSets', None) self.assertIsNotNone(rs) self.assertIsNotNone(rs[0].get('headers', None)) def test_teams(self): t = self.nbs.teams() self.assertIsInstance(t, basestring) self.assertRegexpMatches(t, r'Pistons')
class NBAComParser_test(unittest.TestCase): def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper() self.nbp = NBAComParser() def _get_from_file(self, fn): if os.path.isfile(fn): try: with open(fn) as x: return x.read() except: logging.exception('could not read from file ' + fn) return None def _one_player_gamelogs(self, fn='one_player_gamelogs.json'): return self._get_from_file(fn) def _one_team_gamelogs(self, fn='one_team_gamelogs.json'): return self._get_from_file(fn) def _player_boxscore_traditional(self, fn='boxscore_traditional.json'): return self._get_from_file(fn) def _player_gamelogs(self, fn='player_gamelogs.json'): return self._get_from_file(fn) def _player_info(self, fn='player_info.json'): return self._get_from_file(fn) def _players(self, fn='players.json'): return self._get_from_file(fn) def _playerstats(self, fn='player_stats.json'): return self._get_from_file(fn) def _scoreboard(self, fn='scoreboard.json'): return self._get_from_file(fn) def _team_dashboard(self, fn='team_dashboard.json'): return self._get_from_file(fn) def _team_gamelogs(self, fn='team_game_logs.json'): return self._get_from_file(fn) def _team_opponent_dashboard(self, fn='team_opponent_dashboard.json'): return self._get_from_file(fn) def _teams(self, fn='teams.js'): return self._get_from_file(fn) def _teamstats(self, fn='teamstats.json'): return self._get_from_file(fn) def test_boxscore(self): content = self._player_boxscore_traditional() if not content: content = self.nbs.boxscore_traditional('0021500001') players, teams, starter_bench = self.nbp.boxscore_traditional(content) self.assertIsInstance(players, list) self.assertIsInstance(teams, list) self.assertIsInstance(players[0], dict) self.assertIn('MIN_PLAYED', players[0], "players should have min_played") self.assertIn('MIN_PLAYED', players[8], "players should have min_played") self.assertIn('MIN', teams[0], "teams should have min") self.assertIn('MIN', teams[1], "teams should have min") def test_one_player_gamelogs(self): content = self._one_player_gamelogs() if not content: content = self.nbs.one_player_gamelogs('203083', '2015-16') gls = self.nbp.one_player_gamelogs(content) self.assertIsInstance(gls, list) self.assertIsInstance(gls[0], dict) logging.info(gls) def test_one_team_gamelogs(self): content = self._one_team_gamelogs() if not content: team_id = '1610612765' season = '2015-16' content = self.nbs.one_team_gamelogs(team_id, season) gls = self.nbp.one_team_gamelogs(content) self.assertIsInstance(gls, list) self.assertIsInstance(gls[0], dict) def test_player_info(self): content = self._player_info() if not content: content = self.nbs.player_info('201939', '2015-16') pinfo = self.nbp.player_info(content) self.assertIsInstance(pinfo, dict) def test_players(self): content = self._players() if not content: content = self.nbs.players(season='2016-17', cs_only='1') ps = self.nbp.players(content) self.assertIsInstance(ps, list) self.assertIsNotNone(ps[0], 'ps should not be none') def test_playerstats(self): content = self._playerstats() if not content: statdate = '2016-01-20' content = self.nbs.playerstats('2015-16') ps = self.nbp.playerstats(content, statdate) self.assertIsInstance(ps, list) self.assertEqual(ps[0].get('STATDATE'), statdate) def test_season_gamelogs(self): team_content = self._team_gamelogs() player_content = self._player_gamelogs() if not team_content: team_content = self.nbs.season_gamelogs('2015-16', 'T') if not player_content: player_content = self.nbs.season_gamelogs('2015-16', 'P') team_gl = self.nbp.season_gamelogs(team_content, 'T') player_gl = self.nbp.season_gamelogs(player_content, 'P') self.assertIsInstance(player_gl, list) self.assertIsInstance(team_gl, list) self.assertIn('GAME_ID', player_gl[0], "players should have game_id") self.assertIn('GAME_ID', team_gl[0], "teams should have game_id") def test_team_dashboard(self): content = self._team_dashboard() if not content: team_id = '1610612765' season = '2015-16' content = self.nbs.team_dashboard(team_id, season) td = self.nbp.team_dashboard(content) self.assertIsInstance(td, dict) self.assertIn('overall', td, "dashboard has overall") def test_team_opponent_dashboard(self): content = self._team_opponent_dashboard() if not content: season = '2015-16' content = self.nbs.team_opponent_dashboard(season) td = self.nbp.team_opponent_dashboard(content) self.assertIsInstance(td, list) self.assertIsNotNone(td[0], 'td should not be None') def test_teams(self): content = self._teams() if not content: content = self.nbs.teams() tms = self.nbp.teams(content) self.assertIsInstance(tms, dict) self.assertIsNotNone(tms, 'tms should not be None') def test_teamstats(self): content = self._teamstats() if not content: season = '2015-16' content = self.nbs.teamstats(season) ts = self.nbp.teamstats(content) self.assertIsInstance(ts, list) self.assertIsNotNone(ts[0], 'ts should not be None')
class NBAComAgent(object): ''' Performs script-like tasks using NBA.com API ''' def __init__(self, db=None, cache_name=None, cookies=None): ''' Arguments: cache_name: str for scraper cache_name cookies: cookie jar db: NBAComPg instance ''' self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() if db: self.db = db self.insert_db = True else: self.insert_db = False def _merge(self, merge_dico, dico_list): ''' See http://stackoverflow.com/questions/28838291/merging-multiple-dictionaries-in-python Args: merge_dico: dico_list: Returns: merged dictionary ''' for dico in dico_list: for key, value in dico.items(): if type(value) == type(dict()): merge_dico.setdefault(key, dict()) self._merge(merge_dico[key], [value]) else: merge_dico[key] = value return merge_dico def combined_player_boxscores(self, gid): ''' Combines 5 types of boxscores from nba.com API into list of boxscores Arguments: gid: string game ID, with leading '00' Returns: list of player boxscores combined - traditional, advanced, misc, scoring, usage Examples: a = NBAComAgent() playerboxes = a.combined_player_boxscores('0020161001') ''' traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional( self.scraper.boxscore_traditional(gid)) adv_players, adv_team = self.parser.boxscore_advanced( self.scraper.boxscore_advanced(gid)) misc_players, misc_team = self.parser.boxscore_misc( self.scraper.boxscore_misc(gid)) scoring_players, scoring_team = self.parser.boxscore_scoring( self.scraper.boxscore_scoring(gid)) usage_players = self.parser.boxscore_usage( self.scraper.boxscore_usage(gid)) # now need to combine player and team boxscores players = self._merge( dict(), [{t['PLAYER_ID']: t for t in traditional_players}, {t['PLAYER_ID']: t for t in adv_players}, {t['PLAYER_ID']: t for t in misc_players}, {t['PLAYER_ID']: t for t in scoring_players}, {t['PLAYER_ID']: t for t in usage_players}]) return list(players.values()) def combined_team_boxscores(self, gid): ''' Combines 5 types of boxscores from nba.com API into list of boxscores Arguments: gid: string game ID, with leading '00' Returns: list of team boxscores - combined traditional, advanced, misc, scoring Examples: a = NBAComAgent() teamboxes = a.combined_team_boxscores('0020161001') ''' traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional( self.scraper.boxscore_traditional(gid)) adv_players, adv_teams = self.parser.boxscore_advanced( self.scraper.boxscore_advanced(gid)) misc_players, misc_teams = self.parser.boxscore_misc( self.scraper.boxscore_misc(gid)) scoring_players, scoring_teams = self.parser.boxscore_scoring( self.scraper.boxscore_scoring(gid)) # now need to combine player and team boxscores teams = self._merge( dict(), [{t['TEAM_ID']: t for t in traditional_teams}, {t['TEAM_ID']: t for t in adv_teams}, {t['TEAM_ID']: t for t in misc_teams}, {t['TEAM_ID']: t for t in scoring_teams}]) return list(teams.values()) def new_players(self, season): ''' Updates players table with missing players Arguments: season (str): in YYYY-YY format Returns: list of players to add to stats.players Examples: a = NBAComAgent(cache_name='newplayers', cookies=httplib.CookieJar(), db=NBAComPg(...)) np = a.new_players(season='2015-16') ''' content = self.scraper.players(season, cs_only=1) players = self.parser.players(content) currids = set([int(p.get('PERSON_ID', 0)) for p in players]) allids = set( self.db.select_list('SELECT nbacom_player_id from players')) missing = currids - allids if missing: np = [ self.parser.player_info(self.scraper.player_info(pid, season)) for pid in missing ] if self.insert_db: self.db.insert_players(np) return np else: return None def player_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches player_gamelogs and updates player_gamelogs table Arguments: season: str in YYYY-YY format (2015-16) date_from: str in YYYY-mm-dd format date_to: str in YYYY-mm-dd format insert_db: add list to database Returns: players (list): player dictionary of stats + dfs points Examples: a = NBAComAgent() np = a.player_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True) ''' # mpgl -> list of game_ids for which there are no player gamelogs # filter gamelogs to those from missing game_ids pgl = self.parser.season_gamelogs( self.scraper.season_gamelogs(season, 'P'), 'P') mpgl = self.db.missing_pgl() if pgl and mpgl: pgl = [gl for gl in pgl if gl.get('GAME_ID', None) in mpgl] # now make sure you have no new players currids = set([int(p.get('PERSON_ID', 0)) for p in pgl]) allids = set( self.db.select_list('SELECT nbacom_player_id from players')) if self.insert_db: self.db.insert_players([ self.parser.player_info( self.scraper.player_info(pid, season)) for pid in currids - allids ]) self.db.insert_player_gamelogs(pgl) return pgl else: return None def playerstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches playerstats and updates player_gamelogs table Arguments: season: str in YYYY-YY format (2015-16) date_from: str in YYYY-mm-dd format date_to: str in YYYY-mm-dd format all_missing: boolean Returns: players (list): player dictionary of stats + dfs points Examples: a = NBAComAgent() np = a.playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' if date_from and date_to: ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ps = [ self._merge(dict(), [psb, psadv]) for psb, psadv in zip(ps_base, ps_advanced) ] if self.insert_db: self.db.insert_playerstats(ps, as_of=date_to) return ps elif all_missing: pstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list( 'SELECT game_date FROM missing_playerstats'): daystr = datetostr(day, 'nba') ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ps = [ self._merge(dict(), [psb, psadv]) for psb, psadv in zip(ps_base, ps_advanced) ] pstats[daystr] = ps if self.insert_db: self.db.insert_playerstats(ps, as_of=daystr) return pstats else: raise ValueError( 'need to specify dates or set all_missing to true') def scoreboards(self, season_start, season_end): ''' Downloads and parses range of scoreboards Arguments: season_start (str): in %Y-%m-%d format season_end (str): in %Y-%m-%d format Returns: scoreboards (list): scoreboard dicts Examples: a = NBAComAgent() sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15') ''' scoreboards = [] for day in reversed(date_list(season_end, season_start)): game_date = dt.datetime.strftime(day, '%Y-%m-%d') scoreboard_json = self.scraper.scoreboard(game_date=game_date) scoreboard = self.parser.scoreboard(scoreboard_json, game_date=game_date) scoreboards.append(scoreboard) if self.insert_db: self.db.insert_scoreboards(scoreboards) return scoreboards def team_gamelogs(self, season, date_from=None, date_to=None): ''' Fetches team_gamelogs and updates cs_team_gamelogs table Arguments: season (str): in YYYY-YY format (2015-16) Returns: team_gl (list): player dictionary of stats Examples: a = NBAComAgent() tgl = a.team_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True) ''' content = self.scraper.season_gamelogs(season=season, player_or_team='T') tgl = self.parser.season_gamelogs(content, 'T') mtgl = self.db.missing_tgl() if tgl and mtgl: toins = [gl for gl in tgl if gl.get('GAME_ID', None) in mtgl] if self.insert_db: self.db.insert_team_gamelogs(toins) return toins else: logging.error('no team gamelogs to insert') def teamstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches teamstats and updates database table Arguments: season (str): in YYYY-YY format (2015-16) date_from (str): in %Y-%m-%d format, default beginning of season date_from (str): in %Y-%m-%d format, default yesterday all_missing: boolean Returns: list of team dictionary of basic and advanced stats Examples: a = NBAComAgent() ps = a.teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ps = a.teamstats(season='2015-16', all_missing=True) ''' if date_from and date_to: ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ts = [ self._merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] if self.insert_db: self.db.insert_teamstats(ts, as_of=date_to) return ts elif all_missing: tstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list( 'SELECT game_date FROM missing_teamstats'): daystr = datetostr(day, 'nba') ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ts = [ self._merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] tstats[daystr] = ts if self.insert_db: self.db.insert_teamstats(ts, as_of=daystr) logging.info('teamstats: completed {}'.format(daystr)) else: logging.error('did not insert: {}'.format(ts)) return tstats else: raise ValueError( 'need to specify dates or set all_missing to true') def team_opponent_dashboards(self, season, date_from=None, date_to=None, all_missing=False): ''' Downloads and parses range of team_opponents Arguments: season (str): in YYYY-YY format date_from (str): in %Y-%m-%d format, default is actual start of season date_to (str): in %Y-%m-%d format, default is actual end of season all_missing (bool): get all missing dashboards Returns: topp (list): dicts Examples: a = NBAComAgent() topp = a.team_opponent_dashboards('2014-15') ''' if date_from and date_to: content = self.scraper.team_opponent_dashboard(season, DateFrom=date_from, DateTo=date_to) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=date_to) return topp elif all_missing: topps = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list( 'SELECT * FROM missing_team_opponent_dashboard'): daystr = datetostr(day, 'nba') content = self.scraper.team_opponent_dashboard(season, DateFrom=start, DateTo=daystr) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=daystr) topps[daystr] = topp return topps else: raise ValueError( 'need to specify dates or set all_missing to true') def update_player_positions(self): ''' Trying to make sure all position data is current Only info in nba.com is PLAYER key, this is only Guard, etc. Unclear where the PG, etc. comes from TODO: this is not functional yet ''' if not self.db: raise ValueError('need database connection to update players') q = """SELECT nbacom_player_id FROM stats.players2 WHERE nbacom_position IS NULL or nbacom_position = ''""" uq = """UPDATE stats.players2 SET nbacom_position = '{}' WHERE nbacom_player_id = {}""" for pid in self.db.select_list(q): logging.info('getting {}'.format(pid)) pinfo = self.parser.player_info( self.scraper.player_info(pid, '2015-16')) if pinfo.get('POSITION'): self.db.update(uq.format(pinfo.get('POSITION'), pid)) logging.info('inserted {}'.format( pinfo.get('DISPLAY_FIRST_LAST')))
import sys import time from configparser import ConfigParser from nba.scrapers.nbacom import NBAComScraper from nba.db.nbacom import NBAComPg from nba.db.queries import missing_games_meta logging.basicConfig(stream=sys.stdout, level=logging.INFO) config = ConfigParser() configfn = os.path.join(os.path.expanduser('~'), '.pgcred') config.read(configfn) s = NBAComScraper(cache_name='games-meta') nbapg = NBAComPg(username=config['nbadb']['username'], password=config['nbadb']['password'], database=config['nbadb']['database']) teams = [] url = 'http://data.nba.com/data/10s/prod/v1/{game_date}/{game_id}_boxscore.json' headers = ['game_id', 'gamecode', 'game_date', 'team_code', ] for item in nbapg.select_dict(missing_games_meta()): try: content = s.get_json(url.format(game_date=item['game_date'], game_id=item['game_id'])) vteam_code = content['basicGameData']['vTeam']['triCode'] vscore = int(content['basicGameData']['vTeam']['score']) vls = [int(l['score']) for l in content['basicGameData']['vTeam']['linescore']] hteam_code = content['basicGameData']['hTeam']['triCode'] hscore = int(content['basicGameData']['hTeam']['score'])
class NBAComAgent(object): ''' Performs script-like tasks using NBA.com API ''' def __init__(self, db=None, cache_name='nbacom-agent', cookies=None, table_names=None): ''' Args: db (NBAPostgres): instance cache_name (str): for scraper cache_name cookies: cookie jar table_names (dict): Database table names ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies) self.parser = NBAComParser() self.db = db if table_names: self.table_names = table_names else: self.table_names = { 'pgl': 'player_gamelogs', 'tgl': 'team_gamelogs', 'seas': 'season', 'pl': 'player', 'ps': 'playerstats_daily', 'ts': 'teamstats_daily', 'tod': 'team_opponent_dashboard', 'pbs': 'player_boxscores_combined', 'tbs': 'team_boxscores_combined', 'box2': 'game_boxscores', 'tm': 'team' } def combined_boxscores(self, game_ids=None): ''' Combines 5 types of boxscores (traditional, advanced, misc, scoring, usage) into list of team boxscore Arguments: game_id (str): game ID, with leading '00' Returns: tuple: list of player boxscores, list of team boxscores ''' player_boxscores = [] team_boxscores = [] if not game_ids: game_ids = set(self.db.select_list(missing_player_boxscores())) | \ set(self.db.select_list(missing_team_boxscores())) # need union of missing player and team for game_id in game_ids: # make sure game_id in correct format when requesting boxscores gid = nbacom_idstr(game_id) logging.info( 'agent.nbacom.combined_boxscores: getting {}'.format(gid)) # get 5 types of boxscores traditional_players, traditional_teams, traditional_starter_bench = \ self.parser.boxscore_traditional(self.scraper.boxscore_traditional(gid)) adv_players, adv_teams = self.parser.boxscore_advanced( self.scraper.boxscore_advanced(gid)) misc_players, misc_teams = self.parser.boxscore_misc( self.scraper.boxscore_misc(gid)) scoring_players, scoring_teams = self.parser.boxscore_scoring( self.scraper.boxscore_scoring(gid)) usage_players = self.parser.boxscore_usage( self.scraper.boxscore_usage(gid)) # combine player and team boxscores, respectively players_combined = list( merge_many(dict(), [{t['PLAYER_ID']: t for t in traditional_players}, {t['PLAYER_ID']: t for t in adv_players}, {t['PLAYER_ID']: t for t in misc_players}, {t['PLAYER_ID']: t for t in scoring_players}, {t['PLAYER_ID']: t for t in usage_players}]).values()) teams_combined = list( merge_many(dict(), [{ t['TEAM_ID']: t for t in traditional_teams }, {t['TEAM_ID']: t for t in adv_teams}, {t['TEAM_ID']: t for t in misc_teams}, {t['TEAM_ID']: t for t in scoring_teams}]).values()) # now add to the database self.db.safe_insert_dicts(player_boxscores_table(players_combined), self.table_names['pbs']) self.db.safe_insert_dicts(team_boxscores_table(teams_combined), self.table_names['tbs']) return (player_boxscores, team_boxscores) def game_boxscores(self, game_ids=None): ''' Updates table with game_information Args: None Returns: None ''' if not game_ids: game_ids = self.db.select_dict(missing_game_boxscores()) for g in game_ids: try: content = self.scraper.boxscore_v2015(g['gid'], g['gd']) v, h = self.parser.boxscore_v2015(content) self.db._insert_dict(v, self.table_names['box2']) self.db._insert_dict(h, self.table_names['box2']) logging.info('finished {} - {}'.format(g['gd'], g['gid'])) return [v, h] except Exception as e: logging.error('could not get {}'.format(g)) logging.exception(e) return None def gleague_players(self, year): ''' Updates player table with g-league players Args: year (int): 2017, etc. Returns: None ''' content = self.scraper.gleague_players(year) players = self.parser.gleague_players(content) for glp in gleague_player_table(players): self.db._insert_dict(glp, self.table_names['pl']) return players def player_gamelogs(self, season_code, date_from=None, date_to=None): ''' Fetches player_gamelogs and updates player_gamelogs table Args: season_code (str): in YYYY-YY format (2017-18) date_from (str): in YYYY-mm-dd format, default None date_to (str): in YYYY-mm-dd format, default None Returns: list: of player dict ''' # get all player gamelogs from nba.com content = self.scraper.season_gamelogs(season_code, 'P', date_from=date_from, date_to=date_to) pgl = self.parser.season_gamelogs(content, 'P') pgl_s = set( ['{}-{}'.format(gl['GAME_ID'], gl['PLAYER_ID']) for gl in pgl]) # compare to gamelogs in database: refresh view then compare dbpgl = self.db.select_dict( 'SELECT nbacom_game_id, nbacom_player_id FROM cs_player_gamelogs') dbpgl_s = set([ '00{}-{}'.format(gl['nbacom_game_id'], gl['nbacom_player_id']) for gl in dbpgl ]) # only try to insert missing gamelogs missing = pgl_s - dbpgl_s to_ins = [ gl for gl in pgl if '{}-{}'.format(gl['GAME_ID'], gl['PLAYER_ID']) in missing ] self.db.insert_dicts(player_gamelogs_table(to_ins), 'player_gamelogs') return pgl def playerstats(self, season_code, per_mode='Totals', date_from=None, date_to=None, all_missing=False): ''' Fetches playerstats and updates playerstats table Args: season_code (str): in YYYY-YY format, e.g. 2017-18 per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in YYYY-mm-dd format, default None date_to (str): in YYYY-mm-dd format, default None all_missing (bool): default False Returns: None ''' if date_from and date_to: ps_base = self.parser.playerstats( self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ps = [ merge_two(psb, psadv) for psb, psadv in zip(ps_base, ps_advanced) ] vals = playerstats_table(ps, date_to, per_mode) logging.info(vals) self.db.insert_dicts(vals, self.table_names['ps']) elif all_missing: start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list(missing_playerstats(per_mode)): daystr = datetostr(day, 'nba') base_content = self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=start, DateTo=daystr) ps_base = self.parser.playerstats(base_content, per_mode=per_mode) adv_content = self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=start, DateTo=daystr, MeasureType='Advanced') ps_advanced = self.parser.playerstats(adv_content, per_mode) ps = [ merge_two(psadv, psb) for psb, psadv in zip(ps_base, ps_advanced) ] self.db.insert_dicts(playerstats_table(ps, daystr, per_mode), self.table_names['ps']) logging.info('completed {}'.format(daystr)) else: raise ValueError( 'agent.nbacom.playerstats: need to specify dates or set all_missing to true' ) def refresh_materialized(self): ''' Calls postgres function to refresh all materialized views. ''' refreshq = """SELECT RefreshAllMaterializedViews('*', true);""" try: self.db.execute(refreshq) except Exception as e: logging.exception( 'could not refresh materialized views: {}'.format(e)) def team_gamelogs(self, season_code, date_from=None, date_to=None): ''' Fetches team_gamelogs and updates team_gamelogs table Args: season_code (str): in YYYY-YY format (2017-18) date_from (str): in YYYY-mm-dd format, default None date_to (str): in YYYY-mm-dd format, default None Returns: list: of dict ''' content = self.scraper.season_gamelogs(season_code, 'T', date_from, date_to) tgl = self.parser.season_gamelogs(content, 'T') tgl_s = set( ['{}-{}'.format(gl['GAME_ID'], gl['TEAM_ID']) for gl in tgl]) # compare team gamelogs to those already in database dbtgl_s = set( self.db.select_list( """SELECT CONCAT(nbacom_game_id, '-', nbacom_team_id) FROM cs_team_gamelogs""" )) # only try to insert missing gamelogs missing = tgl_s - dbtgl_s to_ins = [ gl for gl in tgl if '{}-{}'.format(gl['GAME_ID'], gl['TEAM_ID']) in missing ] for item in team_gamelogs_table(to_ins): self.db._insert_dict(item, self.table_names['tgl']) return tgl def team_opponent_dashboards(self, season_code, per_mode, date_from=None, date_to=None, all_missing=False): ''' Downloads and parses range of team_opponents Arguments: season_code (str): in YYYY-YY format per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in %Y-%m-%d format, default is actual start of season date_to (str): in %Y-%m-%d format, default is actual end of season all_missing (bool): get all missing dashboards Returns: None ''' if date_from and date_to: content = self.scraper.team_opponent_dashboard(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to) topp = self.parser.team_opponent_dashboard(content) self.db.insert_dicts(team_opponent_dashboards_table(topp, date_to), self.table_names.get('tod')) elif all_missing: topps = {} start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list( missing_team_opponent_dashboard(per_mode)): daystr = datetostr(day, 'nba') logging.info('starting dashboards for {}'.format(daystr)) content = self.scraper.team_opponent_dashboard(season_code, date_from=start, date_to=date_to) topp = self.parser.team_opponent_dashboard(content, per_mode) self.db.insert_dicts( team_opponent_dashboards_table(topp, daystr, per_mode), self.table_names.get('tod')) return topps else: raise ValueError( 'need to specify dates or set all_missing to true') def teamstats(self, season_code, per_mode, date_from=None, date_to=None, all_missing=False): ''' Fetches teamstats and updates database table Args: season_code (str): in YYYY-YY format (2015-16) per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in %Y-%m-%d format date_to (str): in %Y-%m-%d format all_missing (bool): looks for all missing teamstats from season Returns: list of team dictionary of basic and advanced stats ''' if date_from and date_to: content_base = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to) ts_base = self.parser.teamstats(content_base, per_mode) content_adv = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to, MeasureType='Advanced') ts_advanced = self.parser.teamstats(content_adv, per_mode) ts_merged = [ merge_two(tsb, tsadv) for tsb, tsadv in zip(ts_base, ts_advanced) ] self.db.insert_dicts(teamstats_table(ts_merged, date_to, per_mode), self.table_names['ts']) elif all_missing: start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list(missing_teamstats(per_mode)): logging.info('teamstats: getting {}'.format(day)) daystr = datetostr(day, 'nba') content_base = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=start, date_to=daystr) ts_base = self.parser.teamstats(content_base, per_mode) content_adv = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=start, date_to=daystr, MeasureType='Advanced') ts_advanced = self.parser.teamstats(content_adv, per_mode) ts = [ merge_two(tsb, tsadv) for tsb, tsadv in zip(ts_base, ts_advanced) ] self.db.insert_dicts(teamstats_table(ts, daystr, per_mode), self.table_names['ts']) logging.debug('teamstats: completed {}'.format(daystr)) else: raise ValueError( 'need to specify dates or set all_missing to true')
class NBAComParser_test(unittest.TestCase): def setUp(self): logging.getLogger(__name__).addHandler(logging.NullHandler()) self.nbs = NBAComScraper() self.nbp = NBAComParser() def _get_from_file(self, fn): # content is none if file does not exist content = None # test if file exists, if so, slurp it into content if os.path.isfile(fn): try: with open(fn) as x: content = x.read() except: logging.exception('could not read from file ' + fn) return content # Can try to access from file instead of using scraper def _player_gamelogs(self,fn='player_gamelogs.json'): return self._get_from_file(fn) def _player_info(self,fn='player_info.json'): return self._get_from_file(fn) def _players(self,fn='players.json'): return self._get_from_file(fn) def _playerstats(self,fn='player_stats.json'): return self._get_from_file(fn) def _scoreboard(self,fn='scoreboard.json'): return self._get_from_file(fn) def _team_gamelogs(self,fn='team_game_logs.json'): return self._get_from_file(fn) # START tests def test_boxscore(self): content = self.nbs.boxscore('0021500001', '2015-16') players, teams, starter_bench = self.nbp.boxscore(content) self.assertIsInstance(players, list) self.assertIsInstance(teams, list) self.assertIsInstance(players[0], dict) self.assertIn('MIN_PLAYED', players[0], "players should have min_played") self.assertIn('MIN_PLAYED', players[8], "players should have min_played") self.assertIn('MIN', teams[0], "teams should have min") self.assertIn('MIN', teams[1], "teams should have min") def test_one_player_gamelogs(self): content = self.nbs.one_player_gamelogs('203083', '2015-16') gls = self.nbp.one_player_gamelogs(content) self.assertIsInstance(gls, list) self.assertIsInstance(gls[0], dict) def test_one_team_gamelogs(self): team_id = '1610612765' season = '2015-16' content = self.nbs.one_team_gamelogs(team_id, season) gls = self.nbp.one_team_gamelogs(content) self.assertIsInstance(gls, list) self.assertIsInstance(gls[0], dict) def test_player_info(self): content = self.nbs.player_info('201939', '2015-16') pinfo = self.nbp.player_info(content) self.assertIsInstance(pinfo, dict) def test_players(self): content = self.nbs.players(season='2015-16', IsOnlyCurrentSeason='1') ps = self.nbp.players(content) self.assertIsInstance(ps, list) self.assertIsNotNone(ps[0], 'ps should not be none') def test_playerstats(self): statdate = '2016-01-20' content = self.nbs.playerstats('2015-16') ps = self.nbp.playerstats(content, statdate) self.assertIsInstance(ps, list) self.assertEqual(ps[0].get('STATDATE'), statdate) def test_season_gamelogs(self): team_content = self.nbs.season_gamelogs('2015-16', 'T') player_content = self.nbs.season_gamelogs('2015-16', 'P') team_gl = self.nbp.season_gamelogs(team_content, 'T') player_gl = self.nbp.season_gamelogs(player_content, 'P') self.assertIsInstance(player_gl, list) self.assertIsInstance(team_gl, list) self.assertIn('GAME_ID', player_gl[0], "players should have game_id") self.assertIn('GAME_ID', team_gl[0], "teams should have game_id") def test_team_dashboard(self): #dashboard = {'parameters': [], 'overall': [], 'location': [], 'days_rest': [], 'wins_losses': []} team_id = '1610612765' season = '2015-16' content = self.nbs.team_dashboard(team_id, season) td = self.nbp.team_dashboard(content) self.assertIsInstance(td, dict) self.assertIn('overall', td, "dashboard has overall") def test_team_opponent_dashboard(self): season = '2015-16' content = self.nbs.team_opponent_dashboard(season) td = self.nbp.team_opponent_dashboard(content) self.assertIsInstance(td, list) self.assertIsNotNone(td[0], 'td should not be None') def test_teams(self): content = self.nbs.teams() tms = self.nbp.teams(content) self.assertIsInstance(tms, dict) self.assertIsNotNone(tms, 'tms should not be None') def test_teamstats(self): season = '2015-16' content = self.nbs.teamstats(season) ts = self.nbp.teamstats(content) self.assertIsInstance(ts, list) self.assertIsNotNone(ts[0], 'ts should not be None')
def load_data(per_mode, playerpool_size, thresh_gp, thresh_min, lastn=0, season_code=None, sortcol=None, fn=None): ''' Loads data from nba.com or csv file Args: per_mode (str): 'Totals', 'PerGame', 'Per48' playerpool_size (int): number of players in pool thresh_gp (int): minimum number of games played thresh_min (int): minimum number of minutes played lastn (int): last number of games, default 0 season_code (str): '2017-18', etc., default None sortcol (str): sort, default None fn (str): filename of csv, default None Returns: DataFrame ''' if fn: df = pd.read_csv(fn) else: scraper = NBAComScraper(cache_name='fbasim') parser = NBAComParser() if not season_code: season_code = current_season_code() content = scraper.playerstats(season_code, per_mode, lastn) df = pd.DataFrame(parser.playerstats(content, per_mode)) df['MIN'] = df['MIN'].astype(int) df = df.rename(index=str, columns={"TEAM_ABBREVIATION": "TEAM"}) # account for gp & minutes thresholds if thresh_gp and thresh_min: crit = (df.GP >= thresh_gp) & (df.MIN >= thresh_min) nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[crit][0:playerpool_size] else: df = df[crit] elif thresh_gp: crit = df.GP >= thresh_gp nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[0:playerpool_size] else: df = df[crit] elif thresh_min: crit = df.MIN >= thresh_min nthresh = len(df[crit]) if nthresh > playerpool_size: df = df[0:playerpool_size] else: df = df[crit] # need index for joins in the sim df.set_index('PLAYER_ID') if sortcol: return df.sort_values(sortcol, ascending=False) else: return df