def salaries(self, day=None, all_missing=False): ''' Args: day(str): in mm_dd_YYYY format Returns: players(list): of player dict ''' if day: sals = self.parser.dk_salaries(self.scraper.model(day), day) if self.insert_db and sals: self.db.insert_salaries(sals, game_date=convert_format(day, 'std')) return sals elif all_missing: salaries = {} for day in self.db.select_list(missing_salaries()): daystr = datetostr(day, 'fl') sals = self.parser.dk_salaries(self.scraper.model(daystr), daystr) salaries[datetostr(day, 'nba')] = sals logging.debug('got salaries for {}'.format(daystr)) time.sleep(1) if self.insert_db and salaries: self.db.insert_salaries_dict(salaries) return salaries else: raise ValueError('must provide day or set all_missing to True')
def many_models(self, model_name='default', range_start=None, range_end=None, all_missing=False): ''' TODO: is not implemented Gets list of player models for day Args: range_start (str): in %Y-%m-%d format range_end (str): in %Y-%m-%d format model_name (str): default, cash, etc. Returns: players (list): parsed model Examples: a = FantasyLabsNBAAgent() models = a.many_models(range_start='2016-03-01', range_end='2016-03-07') models = a.many_models(all_missing=True) ''' models = [] if all_missing: for d in self.db.select_list(missing_models()): daystr = datetostr(d, 'fl') models.append({ 'game_date': daystr, 'data': self.one_model(model_day=daystr, model_name=model_name), 'model_name': model_name }) else: for d in date_list(range_end, range_start): daystr = datetostr(d, 'fl') models.append({ 'game_date': daystr, 'data': self.one_model(model_day=daystr, model_name=model_name), 'model_name': model_name }) if self.insert_db: self.db.insert_models(models) return models
def team_opponent_dashboards(self, season, date_from=None, date_to=None, all_missing=False): ''' Downloads and parses range of team_opponents Arguments: season (str): in YYYY-YY format date_from (str): in %Y-%m-%d format, default is actual start of season date_to (str): in %Y-%m-%d format, default is actual end of season all_missing (bool): get all missing dashboards Returns: topp (list): dicts Examples: a = NBAComAgent() topp = a.team_opponent_dashboards('2014-15') ''' if date_from and date_to: content = self.scraper.team_opponent_dashboard(season, DateFrom=date_from, DateTo=date_to) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=date_to) return topp elif all_missing: topps = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list( 'SELECT * FROM missing_team_opponent_dashboard'): daystr = datetostr(day, 'nba') content = self.scraper.team_opponent_dashboard(season, DateFrom=start, DateTo=daystr) topp = self.parser.team_opponent_dashboard(content) if self.insert_db: self.db.insert_team_opponent_dashboards(topp, as_of=daystr) topps[daystr] = topp return topps else: raise ValueError( 'need to specify dates or set all_missing to true')
def missing_gamelog_players(season_code): ''' Gets list of players from gamelogs that are not in player table Args: a (NBAComAgent): object instance Returns: list: of players from gamelogs that are not in player table ''' start = datetostr(season_start(season_code=season_code), fmt='nba') content = a.scraper.season_gamelogs( season_code, 'P', date_from=start, date_to=today(fmt='nba')) gamelogs = a.parser.season_gamelogs(content, 'P') wanted = ["PLAYER_ID", "PLAYER_NAME", "TEAM_ID", "TEAM_ABBREVIATION", "GAME_ID", "GAME_DATE", "MATCHUP"] players = [{k: v for k, v in p.items() if k in wanted} for p in gamelogs] currids = set([p['PLAYER_ID'] for p in players]) allids = set(a.db.select_list('SELECT nbacom_player_id from player')) missing = currids - allids msg = 'there are {} mising players from gamelogs'.format(len(missing)) logging.info(msg) return [p for p in players if p.get('PLAYER_ID') in missing]
def ownership(self, day=None, all_missing=False): ''' Args: day(str): in mm_dd_YYYY format all_missing(bool): single day or all missing days in season? Returns: players(list): of player ownership dict ''' if day: day = convert_format(day, 'fl') own = self.scraper.ownership(day) if self.insert_db: self.db.insert_ownership(own, convert_format(day, 'std')) return own elif all_missing: owns = {} for day in self.db.select_list( 'SELECT game_date FROM missing_ownership'): daystr = datetostr(day, 'fl') own = self.scraper.ownership(daystr) self.db.insert_ownership(own, convert_format(daystr, 'std')) owns[daystr] = own return owns else: raise ValueError('must provide day or set all_missing to True')
def team_opponent_dashboards(self, season_code, per_mode, date_from=None, date_to=None, all_missing=False): ''' Downloads and parses range of team_opponents Arguments: season_code (str): in YYYY-YY format per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in %Y-%m-%d format, default is actual start of season date_to (str): in %Y-%m-%d format, default is actual end of season all_missing (bool): get all missing dashboards Returns: None ''' if date_from and date_to: content = self.scraper.team_opponent_dashboard(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to) topp = self.parser.team_opponent_dashboard(content) self.db.insert_dicts(team_opponent_dashboards_table(topp, date_to), self.table_names.get('tod')) elif all_missing: topps = {} start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list( missing_team_opponent_dashboard(per_mode)): daystr = datetostr(day, 'nba') logging.info('starting dashboards for {}'.format(daystr)) content = self.scraper.team_opponent_dashboard(season_code, date_from=start, date_to=date_to) topp = self.parser.team_opponent_dashboard(content, per_mode) self.db.insert_dicts( team_opponent_dashboards_table(topp, daystr, per_mode), self.table_names.get('tod')) return topps else: raise ValueError( 'need to specify dates or set all_missing to true')
def rg(): logging.basicConfig(stream=sys.stdout, level=logging.INFO) config = ConfigParser() configfn = os.path.join(os.path.expanduser('~'), '.pgcred') config.read(configfn) nbapg = NBAComPg(username=config['nbadb']['username'], password=config['nbadb']['password'], database=config['nbadb']['database']) with open('/home/sansbacon/data.csv', 'r') as infile: data = [{k: v for k, v in row.items()} for row in csv.DictReader( infile, skipinitialspace=True, delimiter=',')] for d in season_dates('2015-16')[2:]: items = [] players = rotoguru_to_pydfs( filter(lambda x: x['date'] == datetostr(d, 'db'), data)) if players: optimizer = LineupOptimizer(settings.DraftKingsBasketballSettings) optimizer._players = players try: for idx, lineup in enumerate(optimizer.optimize(n=100)): for p in lineup.players: items.append({ 'game_date': d, 'lineup_rank': idx + 1, 'nbacom_player_id': p.id, 'team_code': p.team, 'name': '{} {}'.format(p.first_name, p.last_name), 'positions': p.positions, 'dkpts': p.fppg, 'salary': p.salary }) except Exception as e: logging.exception(e) finally: nbapg.insert_dicts(items, 'optimal_lineups') logging.info('finished {}'.format(d))
def rotoworld(scraper, season, db): results = [] p = RotoworldNBAParser() url = 'http://www.rotoworld.com/teams/depth-charts/nba.aspx' for d in date_list(d2=season_start(season), d1=season_end(season), delta=7): dstr = datetostr(d, 'db') try: content, content_date = scraper.get_wayback(url, d=dstr, max_delta=10) if content and content_date: for r in p.depth_charts(content, dstr): r.pop('pf', None) db._insert_dict(r, 'depth_charts') results.append(r) logging.info('completed {}'.format(dstr)) else: logging.error('could not get {}'.format(dstr)) except Exception as e: logging.exception('could not get {}: {}'.format(dstr, e))
def fl(): logging.basicConfig(stream=sys.stdout, level=logging.INFO) config = ConfigParser() configfn = os.path.join(os.path.expanduser('~'), '.pgcred') config.read(configfn) nbapg = NBAComPg(username=config['nbadb']['username'], password=config['nbadb']['password'], database=config['nbadb']['database']) #dq = """SELECT DISTINCT game_date FROM cs_games WHERE game_date < now()::date ORDER BY game_date""" dq = """SELECT DISTINCT game_date FROM games WHERE game_date = '2015-11-02'""" q = """SELECT * FROM past_dfs WHERE game_date = '{}' ORDER BY dk_points DESC""" for d in nbapg.select_list(dq): gd = datetostr(d, 'nba') logging.info('starting {}'.format(gd)) optimizer = LineupOptimizer(settings.DraftKingsBasketballSettings) pls = nbapg.select_dict(q.format(gd)) logging.info(pls[0:3]) optimizer._players = nba_to_pydfs(pls) items = [] try: for idx, lineup in enumerate(optimizer.optimize(n=10)): for p in lineup.players: items.append({ 'game_date': gd, 'lineup_rank': idx + 1, 'nbacom_player_id': p.id, 'positions': p.positions, 'dkpts': p.fppg, 'salary': p.salary }) print(lineup) logging.info('finished lineup {}'.format(idx)) except Exception as e: logging.exception(e) continue
from nba.agents.fantasylabs import FantasyLabsNBAAgent from nba.dates import datetostr from nba.db.fantasylabs import FantasyLabsNBAPg logger = logging.getLogger('nbadb-update') hdlr = logging.StreamHandler(sys.stdout) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.INFO) logger.propagate = False config = ConfigParser() configfn = os.path.join(os.path.expanduser('~'), '.nbadb') config.read(configfn) flpg = FantasyLabsNBAPg(username=config['nbadb']['username'], password=config['nbadb']['password'], database=config['nbadb']['database']) fla = FantasyLabsNBAAgent(db=flpg, cache_name='flabs-nba', cookies=browsercookie.firefox()) fla.update_player_xref() q = """select distinct game_date from games where season = 2015 AND season_type = 'regular' order by game_date DESC""" for d in flpg.select_list(q): try: fla.salaries(day=datetostr(d, site='fl')) logger.info('completed {}'.format(d)) except Exception as e: logger.exception('{} failed: {}'.format(d, e)) finally: time.sleep(1.5)
def run(): ''' Updates nba.com statistics Args: None Returns: None ''' logging.basicConfig(stream=sys.stdout, level=logging.INFO) db = getdb() cn = 'nba-agent-{}'.format(today()) a = NBAComAgent(cache_name=cn, db=db) a.scraper.delay = 1 season_year = 2018 season_code = '2017-18' # ensures players table is up-to-date before inserting gamelogs, etc. # players uses 2017 as season_year if season_code is 2017-18 # whereas nbadb calls that season_year 2018 logging.info('starting update nba.com players') nbadb_player_update.run() logging.info('finished update nba.com players') # player_gamelogs logging.info('starting nba.com player gamelogs') a.player_gamelogs(season_code, date_from=datetostr( season_start(season_code=season_code), fmt='nba'), date_to=yesterday(fmt='nba')) logging.info('finished nba.com player gamelogs') # playerstats_daily logging.info('starting playerstats daily') a.playerstats(season_code, all_missing=True, per_mode='Totals') a.playerstats(season_code, all_missing=True, per_mode='PerGame') logging.info('finished playerstats daily') # player and team boxscores combined logging.info('starting player_boxscores_combined') pbs, tbs = a.combined_boxscores() logging.info('finished player_boxscores_combined') # update team_gamelogs logging.info('starting team gamelogs') a.team_gamelogs(season_code) logging.info('finished team gamelogs') # teamstats_daily logging.info('starting teamstats daily') a.teamstats(season_code, all_missing=True, per_mode='Totals') a.teamstats(season_code, all_missing=True, per_mode='PerGame') logging.info('finished teamstats daily') # team_opponent_dashboards logging.info('start team_opponent_dashboards') a.team_opponent_dashboards(season_code, all_missing=True, per_mode='Totals') a.team_opponent_dashboards(season_code, all_missing=True, per_mode='PerGame') logging.info('finished team_opponent_dashboards') # game boxscores - linescores, refs, etc. logging.info('start game_boxscores') a.game_boxscores() logging.info('finished game_boxscores') # refresh all materialized views logging.info('start refresh materialized views') a.refresh_materialized() logging.info('refreshed materialized views')
def teamstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches teamstats and updates database table Arguments: season (str): in YYYY-YY format (2015-16) date_from (str): in %Y-%m-%d format, default beginning of season date_from (str): in %Y-%m-%d format, default yesterday all_missing: boolean Returns: list of team dictionary of basic and advanced stats Examples: a = NBAComAgent() ps = a.teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ps = a.teamstats(season='2015-16', all_missing=True) ''' if date_from and date_to: ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ts = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] if self.insert_db: self.db.insert_teamstats(ts, as_of=date_to) return ts elif all_missing: tstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list(missing_teamstats()): daystr = datetostr(day, 'nba') ts_base = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr)) ts_advanced = self.parser.teamstats( self.scraper.teamstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ts = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ts_base, ts_advanced) ] tstats[daystr] = ts if self.insert_db: self.db.insert_teamstats(ts, as_of=daystr) logging.debug('teamstats: completed {}'.format(daystr)) else: logging.error('did not insert: {}'.format(ts)) return tstats else: raise ValueError( 'need to specify dates or set all_missing to true')
def playerstats(self, season, date_from=None, date_to=None, all_missing=False): ''' Fetches playerstats and updates player_gamelogs table Arguments: season: str in YYYY-YY format (2015-16) date_from: str in YYYY-mm-dd format date_to: str in YYYY-mm-dd format all_missing: boolean Returns: players (list): player dictionary of stats + dfs points Examples: a = NBAComAgent() np = a.playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08') ''' if date_from and date_to: ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ps = [ merge(dict(), [psb, psadv]) for psb, psadv in zip(ps_base, ps_advanced) ] if self.insert_db: self.db.insert_playerstats(ps, as_of=date_to) return ps elif all_missing: pstats = {} start = datetostr(d=season_start(season), site='nba') for day in self.db.select_list(missing_playerstats()): daystr = datetostr(day, 'nba') ps_base = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season, DateFrom=start, DateTo=daystr, MeasureType='Advanced')) ps = [ merge(dict(), [psadv, psb]) for psb, psadv in zip(ps_base, ps_advanced) ] pstats[daystr] = ps if self.insert_db: self.db.insert_playerstats(ps, as_of=daystr) logging.info('completed {}'.format(daystr)) return pstats else: raise ValueError( 'need to specify dates or set all_missing to true')
def teamstats(self, season_code, per_mode, date_from=None, date_to=None, all_missing=False): ''' Fetches teamstats and updates database table Args: season_code (str): in YYYY-YY format (2015-16) per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in %Y-%m-%d format date_to (str): in %Y-%m-%d format all_missing (bool): looks for all missing teamstats from season Returns: list of team dictionary of basic and advanced stats ''' if date_from and date_to: content_base = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to) ts_base = self.parser.teamstats(content_base, per_mode) content_adv = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=date_from, date_to=date_to, MeasureType='Advanced') ts_advanced = self.parser.teamstats(content_adv, per_mode) ts_merged = [ merge_two(tsb, tsadv) for tsb, tsadv in zip(ts_base, ts_advanced) ] self.db.insert_dicts(teamstats_table(ts_merged, date_to, per_mode), self.table_names['ts']) elif all_missing: start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list(missing_teamstats(per_mode)): logging.info('teamstats: getting {}'.format(day)) daystr = datetostr(day, 'nba') content_base = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=start, date_to=daystr) ts_base = self.parser.teamstats(content_base, per_mode) content_adv = self.scraper.teamstats(season_code, per_mode=per_mode, date_from=start, date_to=daystr, MeasureType='Advanced') ts_advanced = self.parser.teamstats(content_adv, per_mode) ts = [ merge_two(tsb, tsadv) for tsb, tsadv in zip(ts_base, ts_advanced) ] self.db.insert_dicts(teamstats_table(ts, daystr, per_mode), self.table_names['ts']) logging.debug('teamstats: completed {}'.format(daystr)) else: raise ValueError( 'need to specify dates or set all_missing to true')
def playerstats(self, season_code, per_mode='Totals', date_from=None, date_to=None, all_missing=False): ''' Fetches playerstats and updates playerstats table Args: season_code (str): in YYYY-YY format, e.g. 2017-18 per_mode (str): 'Totals', 'PerGame', etc. date_from (str): in YYYY-mm-dd format, default None date_to (str): in YYYY-mm-dd format, default None all_missing (bool): default False Returns: None ''' if date_from and date_to: ps_base = self.parser.playerstats( self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=date_from, DateTo=date_to)) ps_advanced = self.parser.playerstats( self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced')) ps = [ merge_two(psb, psadv) for psb, psadv in zip(ps_base, ps_advanced) ] vals = playerstats_table(ps, date_to, per_mode) logging.info(vals) self.db.insert_dicts(vals, self.table_names['ps']) elif all_missing: start = datetostr(d=season_start(season_code=season_code), fmt='nba') for day in self.db.select_list(missing_playerstats(per_mode)): daystr = datetostr(day, 'nba') base_content = self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=start, DateTo=daystr) ps_base = self.parser.playerstats(base_content, per_mode=per_mode) adv_content = self.scraper.playerstats(season_code, per_mode=per_mode, DateFrom=start, DateTo=daystr, MeasureType='Advanced') ps_advanced = self.parser.playerstats(adv_content, per_mode) ps = [ merge_two(psadv, psb) for psb, psadv in zip(ps_base, ps_advanced) ] self.db.insert_dicts(playerstats_table(ps, daystr, per_mode), self.table_names['ps']) logging.info('completed {}'.format(daystr)) else: raise ValueError( 'agent.nbacom.playerstats: need to specify dates or set all_missing to true' )
def match_nbacom_player(self, nbacom_player): ''' Matches nbacom player with bbref player Args: nbacom_player (dict): Returns: dict ''' # nbacom player # {'birthdate': datetime.datetime(1993, 8, 1, 0, 0), 'country': 'Spain', # 'display_first_last': 'Alex Abrines', 'draft_number': 32, 'draft_round': 2, 'draft_year': 2013, # 'first_name': 'Alex', 'from_year': 2016, 'height': 42, 'jersey': 8, # 'last_affiliation': 'Spain/Spain', 'last_name': 'Abrines', 'nbacom_player_id': 203518, # 'nbacom_position': 'G', 'school': '', 'weight': 190} # # bbref player #{'source': 'bref', source_player_dob': '1992-03-23', 'source_player_id': 'irvinky01', # 'source_player_name': 'Kyrie Irving', 'source_player_position': 'Point Guard', # 'source_player_team_code': 'BOS', 'source_player_team_name': 'Boston Celtics'} # bbref_players caches pages for each letter # helpful if doing more than a few players letter = nbacom_player['last_name'][0].lower() if not self.bbref_players.get(letter): content = self.scraper.players(letter) self.bbref_players[letter] = self.parser.players(content) # step one: find all players with the same name matches = [ p for p in self.bbref_players.get(letter) if p['source_player_name'] == nbacom_player.get('display_first_last') ] # if no matches, then look for individual player page on bbref # newer players may not have been added to the letter index page ('a', 'b', 'c') if not matches: pid = bbref_player_id(nbacom_player['first_name'], nbacom_player['last_name']) logging.info('trying player page for {}'.format(pid)) content = self.scraper.player_page(pid) bbref_player = self.parser.player_page(content, pid) if bbref_player: return bbref_player # if there is only 1 match, then assume it is the right player # need to get the player page, which has the full position elif matches and len(matches) == 1: logging.info('add_gamelog_player: found 1 match') pid = matches[0].get('source_player_id') content = self.scraper.player_page(pid) bbref_player = self.parser.player_page(content, pid) if bbref_player: return bbref_player # if more than 1 match, then try to match team as well # very unlikely to have duplicate elif matches and len(matches) > 1: logging.info('add_gamelog_player: found >1 match') for match in matches: dob = match['source_player_dob'] if dob == datetostr(nbacom_player.get('birthdate'), fmt='nba'): return match else: logging.info('no match for {}'.format( nbacom_player['display_first_last'])) return None