Beispiel #1
0
class NBAComParser_test(unittest.TestCase):

    def setUp(self):
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        self.nbs = NBAComScraper()
        self.nbp = NBAComParser()

    def _get_from_file(self, fn):
        # content is none if file does not exist
        content = None

        # test if file exists, if so, slurp it into content
        if os.path.isfile(fn):

            try:
                with open(fn) as x:
                    content = x.read()
            except:
                logging.exception('could not read from file ' + fn)

        return content

    # Can try to access from file instead of using scraper

    def _player_gamelogs(self,fn='player_gamelogs.json'):
        return self._get_from_file(fn)

    def _player_info(self,fn='player_info.json'):
        return self._get_from_file(fn)

    def _players(self,fn='players.json'):
        return self._get_from_file(fn)

    def _playerstats(self,fn='player_stats.json'):
        return self._get_from_file(fn)

    def _scoreboard(self,fn='scoreboard.json'):
        return self._get_from_file(fn)

    def _team_gamelogs(self,fn='team_game_logs.json'):
        return self._get_from_file(fn)

    # START tests

    def test_boxscore(self):
        content = self.nbs.boxscore('0021500001', '2015-16')
        players, teams, starter_bench = self.nbp.boxscore(content)
        self.assertIsInstance(players, list)
        self.assertIsInstance(teams, list)
        self.assertIsInstance(players[0], dict)
        self.assertIn('MIN_PLAYED', players[0], "players should have min_played")
        self.assertIn('MIN_PLAYED', players[8], "players should have min_played")
        self.assertIn('MIN', teams[0], "teams should have min")
        self.assertIn('MIN', teams[1], "teams should have min")

    def test_one_player_gamelogs(self):
        content = self.nbs.one_player_gamelogs('203083', '2015-16')
        gls = self.nbp.one_player_gamelogs(content)
        self.assertIsInstance(gls, list)
        self.assertIsInstance(gls[0], dict)

    def test_one_team_gamelogs(self):
        team_id = '1610612765'
        season = '2015-16'
        content = self.nbs.one_team_gamelogs(team_id, season)
        gls = self.nbp.one_team_gamelogs(content)
        self.assertIsInstance(gls, list)
        self.assertIsInstance(gls[0], dict)

    def test_player_info(self):
        content = self.nbs.player_info('201939', '2015-16')
        pinfo = self.nbp.player_info(content)
        self.assertIsInstance(pinfo, dict)

    def test_players(self):
        content = self.nbs.players(season='2015-16', IsOnlyCurrentSeason='1')
        ps = self.nbp.players(content)
        self.assertIsInstance(ps, list)
        self.assertIsNotNone(ps[0], 'ps should not be none')

    def test_playerstats(self):
        statdate = '2016-01-20'
        content = self.nbs.playerstats('2015-16')
        ps = self.nbp.playerstats(content, statdate)
        self.assertIsInstance(ps, list)
        self.assertEqual(ps[0].get('STATDATE'), statdate)

    def test_season_gamelogs(self):
        team_content = self.nbs.season_gamelogs('2015-16', 'T')
        player_content = self.nbs.season_gamelogs('2015-16', 'P')
        team_gl = self.nbp.season_gamelogs(team_content, 'T')
        player_gl = self.nbp.season_gamelogs(player_content, 'P')
        self.assertIsInstance(player_gl, list)
        self.assertIsInstance(team_gl, list)
        self.assertIn('GAME_ID', player_gl[0], "players should have game_id")
        self.assertIn('GAME_ID', team_gl[0], "teams should have game_id")

    def test_team_dashboard(self):
        #dashboard = {'parameters': [], 'overall': [], 'location': [], 'days_rest': [], 'wins_losses': []}
        team_id = '1610612765'
        season = '2015-16'
        content = self.nbs.team_dashboard(team_id, season)
        td = self.nbp.team_dashboard(content)
        self.assertIsInstance(td, dict)
        self.assertIn('overall', td, "dashboard has overall")

    def test_team_opponent_dashboard(self):
        season = '2015-16'
        content = self.nbs.team_opponent_dashboard(season)
        td = self.nbp.team_opponent_dashboard(content)
        self.assertIsInstance(td, list)
        self.assertIsNotNone(td[0], 'td should not be None')

    def test_teams(self):
        content = self.nbs.teams()
        tms = self.nbp.teams(content)
        self.assertIsInstance(tms, dict)
        self.assertIsNotNone(tms, 'tms should not be None')

    def test_teamstats(self):
        season = '2015-16'
        content = self.nbs.teamstats(season)
        ts = self.nbp.teamstats(content)
        self.assertIsInstance(ts, list)
        self.assertIsNotNone(ts[0], 'ts should not be None')
Beispiel #2
0
class NBAComAgent(NBAAgent):
    '''
    Performs script-like tasks using NBA.com API
    Intended to replace standalone scripts so can use common API and tools

    Examples:
        a = NBAComAgent()
        gamelogs = a.cs_player_gamelogs('2015-16')

    '''

    def __init__(self, db=True, safe=True):
        '''

        Args:
            db (bool): compose NBAComPg object as self.nbadb
            safe (bool): create backups of tables prior to inserts

        '''

        NBAAgent.__init__(self)
        self.logger = logging.getLogger(__name__)
        self.scraper = NBAComScraper()
        self.parser = NBAComParser()
        self.safe = safe
        self.nbas = NBASeasons()

        if db:
            self.nbadb = NBAComPg()
        else:
            self.nbadb = None

    def combine_boxscores(self, boxes, advanced_boxes):
        '''
        Combines NBAComScraper.boxscores() and boxscores_advanced()

        Arguments:
            boxscores(list): list of 'base' boxscores
            boxscores(list): list of 'advanced' boxscores

        Returns:
            merged_players (list): base and advanced combined
            merged_teams (list): base and advanced combined

        Examples:
            a = NBAComAgent()
            combined = a.combine_boxscores(boxes, advanced_boxes)

        '''

        merged_players = []
        merged_teams = []

        for gid, box in boxes.iteritems():

            # players and teams are lists of dicts
            players, teams, starterbench = self.parser.boxscore(box)

            # players_adv and teams_adv are lists of dicts
            adv_box = advanced_boxes.get(gid)
            players_adv, teams_adv = self.parser.boxscore_advanced(adv_box)

            # need to transform into dicts
            players_dict = {p['PLAYER_ID']: p for p in players}
            players_adv_dict = {p['PLAYER_ID']: p for p in players_adv}
            teams_dict = {t['TEAM_ID']: t for t in teams}
            teams_adv_dict = {t['TEAM_ID']: t for t in teams_adv}

            # now loop through players
            for pid, player in players_dict.iteritems():
                player_adv = players_adv_dict.get(pid)

                if player_adv:
                    merged_players.append(self.merge_boxes(player, player_adv))

            # now loop through teams
            for tid, team in teams_dict.iteritems():
                team_adv = teams_adv_dict.get(tid)

                if team_adv:
                    merged_teams.append(self.merge_boxes(team, team_adv))

        self.nbadb.insert_boxscores(merged_players, merged_teams)


    def commonallplayers(self, season):
        '''
        Solves problem of players changing teams
        nba.com updates player teams regularly, so i look every day to make sure lists accurate

        Arguments:
            season (str): in YYYY-YY format

        Returns:
            to_insert (list): list of players that needed to be updated

        Examples:
            a = NBAComAgent()
            combined = a.commonallplayers('2015-16')

        '''

        game_date = dt.datetime.today()
        players = self.parser.players(self.scraper.players(season=season, cs_only='1'))

        to_insert = []

        convert = {
            "PERSON_ID": 'nbacom_player_id',
            "DISPLAY_LAST_COMMA_FIRST": '',
            "DISPLAY_FIRST_LAST": 'display_first_last',
            "ROSTERSTATUS": 'rosterstatus',
            "FROM_YEAR": '',
            "TO_YEAR": '',
            "PLAYERCODE": '',
            "TEAM_ID": 'team_id',
            "TEAM_CITY": '',
            "TEAM_NAME": '',
            "TEAM_ABBREVIATION": 'team_code',
            "TEAM_CODE": '',
            "GAMES_PLAYED_FLAG": ''
        }

        for p in players:
            pti = {'game_date': game_date, 'nbacom_season_id': 22015, 'season': 2016}

            for k,v in p.iteritems():
                converted = convert.get(k)
                if converted:
                    pti[converted] = v

            to_insert.append(pti)

        if self.nbadb:
            if to_insert:
                self.nbadb.insert_dicts(to_insert, 'stats.playerteams')

        return to_insert

    def cs_player_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches player_gamelogs and updates cs_player_gamelogs table

        Arguments:
             season (str): in YYYY-YY format (2015-16)

        Returns:
             players (list): player dictionary of stats + dfs points
        '''

        gamelogs = self.parser.season_gamelogs(self.scraper.season_gamelogs(season, 'P'), 'P')

        table_name = 'stats.cs_player_gamelogs'

        if self.nbadb:
            if self.safe:
                self.nbadb.postgres_backup_table(self.nbadb.database, table_name)
    
            gamelogs = self.nbadb.insert_player_gamelogs(gamelogs, table_name)
            self.nbadb.update_positions(table_name)
            self.nbadb.update_teamids(table_name)

        return gamelogs

    def cs_playerstats(self, season, date_from=None, date_to=None):
        '''
        Fetches cs_player_stats and updates database table

        Arguments:
             season (str): in YYYY-YY format (2015-16)
             date_from (str): in %Y-%m-%d format, default beginning of season
             date_from (str): in %Y-%m-%d format, default yesterday

        Returns:
             player_stats (list): player dictionary of basic and advanced stats

        Examples:
            a = NBAComAgent()
            ps = a.cs_playerstats('2015-16')
            ps = a.cs_playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')

        '''

        # default is to get entire season through yesterday
        yesterday = dt.datetime.strftime(dt.datetime.today() - dt.timedelta(1), '%Y-%m-%d')

        if not date_from:
            date_from = self.nbas.season_start(season)

        if not date_to:
            date_to = yesterday

        ps_base = self.parser.playerstats(self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to))
        ps_advanced = self.parser.playerstats(self.scraper.playerstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced'))

        # now need to merge base and advanced
        ps_base = {p['PLAYER_ID']: p for p in ps_base}

        for ps_adv in ps_advanced:
            pid = ps_adv['PLAYER_ID']
            base = ps_base.get(pid)

            if base:
                base.update(ps_adv)
                ps_base[pid] = base       

        return self.nbadb.insert_playerstats(ps_base.values(), table_name='stats.cs_playerstats', game_date=yesterday)

    def cs_team_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches team_gamelogs and updates cs_team_gamelogs table

        Arguments:
             season (str): in YYYY-YY format (2015-16)

        Returns:
             team_gl (list): player dictionary of stats

        Examples:
            a = NBAComAgent()
            tgl = a.cs_team_gamelogs('2015-16')
            tgl = a.cs_playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')

        '''

        gamelogs = self.parser.season_gamelogs(self.scraper.season_gamelogs(season='2015-16', player_or_team='T'), 'T')
        self.logger.debug('there are {0} team gamelogs'.format(len(gamelogs)))

        if self.nbadb:

            table_name = 'stats.cs_team_gamelogs'

            if self.safe:
                self.nbadb.postgres_backup_table(self.nbadb.database, table_name)

            gamelogs = self.nbadb.insert_team_gamelogs(gamelogs, table_name)
            self.logger.debug('there are now {0} team gamelogs'.format(len(gamelogs)))

        return gamelogs

    def cs_teamstats(self, season, date_from=None, date_to=None):
        '''
        Fetches leaguedashteamstats and updates cs_leaguedashteamstats table

        Arguments:
             season (str): in YYYY-YY format (2015-16)
             date_from (str): in %Y-%m-%d format, default beginning of season
             date_from (str): in %Y-%m-%d format, default yesterday

        Returns:
             teamstats (list): team dictionary of basic and advanced stats
             
         Examples:
            a = NBAComAgent()
            ps = a.cs_teamstats('2015-16')
            ps = a.cs_teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')

        '''

        # default is to get entire season through yesterday
        yesterday = dt.datetime.strftime(dt.datetime.today() - dt.timedelta(1), '%Y-%m-%d')

        if not date_from:
            date_from = self.nbas.season_start(season)

        if not date_to:
            date_to = yesterday

        ts_base = self.parser.teamstats(self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to))
        ts_adv = self.parser.teamstats(self.scraper.teamstats(season, DateFrom=date_from, DateTo=date_to, MeasureType='Advanced'))

        # now need to merge base and advanced
        ts_base = {t['TEAM_ID']: t for t in ts_base}

        for ts_adv in ts_adv:
            tid = ts_adv['TEAM_ID']
            base = ts_base.get(tid)

            if base:
                base.update(ts_adv)
                ts_base[tid] = base

        self.nbadb.insert_teamstats(ts_base.values(), table_name='stats.cs_teamstats', game_date=yesterday)

        return ts_base, ts_adv

    def merge_boxes(self, b1, b2):
        '''
        Combines base and advanced player or team boxscores from same game

        Arguments:
            base_box(dict): base boxscore
            adv_box(dict): advanced boxscore

        Returns:
            merged (dict) or None

        Examples:
            a = NBAComAgent()
            merged = a.merge_boxes(base_box, adv_box)

        '''

        z = b1
        z.update(b2)
        return z

    def players_to_add(self):
        '''
        Compare current_season_gamelogs and players tables to see if missing players in latter

        Arguments:
            None

        Returns:
            list
        '''

        sql = '''SELECT * FROM vw_add_players_table'''
        return self.nbadb.select_dict(sql)

    def scoreboards(self, season_start, season_end, pkl_fname=None):
        '''
        Downloads and parses range of scoreboards, optionally saves to pickle file

        Arguments:
            season_start (str): in %Y-%m-%d format
            season_end (str): in %Y-%m-%d format
            pkl_fname (optional [str]): example - 'scoreboards_2015-16.pkl'

        Returns:
             scoreboards (list): scoreboard dicts

         Examples:
            a = NBAComAgent()
            sb = a.scoreboards()
            sb = a.scoreboards(pkl_fname = 'scoreboards_2015-16.pkl')
            sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15')
        '''

        scoreboards = []

        for day in reversed(self.date_list(season_end, season_start)):
            game_date = dt.datetime.strftime(day, '%Y-%m-%d')
            scoreboard_json = self.nbas.scoreboard(game_date=game_date)
            scoreboard = self.nbap.scoreboard(scoreboard_json, game_date=game_date)
            scoreboards.append(scoreboard)       

        if pkl_fname:
            try:
                with open('/home/sansbacon/scoreboards_20160108.pkl', 'wb') as outfile:
                    pickle.dump(scoreboards, outfile)

            except:
                logging.error('could not save scoreboards to {0}'.format(pkl_fname))

        return scoreboards
        
    def teamgames(self, games):
        '''
        Converts list of games into list in teamgames format, where there are2 teamgames for every game

        Arguments:
            games(list): list of games from nba.com where two teams are in 1 row (visitor, home)

        Returns:
            teamgames(list): list of games in teamgames format, 2 teamgames per game row
            
        Examples:
            # is in format {'game_id', 'visitor_team_id', 'home_team_id', . . . }
            games = NBAPostgres.select_dict('SELECT * FROM games')

            # is in format {'game_id', 'team_id', 'opponent_team_id', 'is_home' . . . }
            teamgames = NBAComAgent.teamgames(games)

        '''

        teamgames = []
        to_drop = ['home_team_code', 'home_team_id', 'visitor_team_code', 'visitor_team_id']

        for game in games:
            tg1 = copy.deepcopy(game)
            tg1['team_code'] = game['home_team_code']
            tg1['team_id'] = game['home_team_id']
            tg1['opponent_team_code'] = game['visitor_team_code']   
            tg1['opponent_team_id'] = game['visitor_team_id']
            tg1['is_home'] = True

            teamgames.append({k:v for k,v in tg1.iteritems() if not k in to_drop})

            tg2 = copy.deepcopy(game)
            tg2['team_code'] = game['visitor_team_code']
            tg2['team_id'] = game['visitor_team_id']
            tg2['opponent_team_code'] = game['home_team_code']
            tg2['opponent_team_id'] = game['home_team_id']
            tg2['is_home'] = False

            teamgames.append({k:v for k,v in tg2.iteritems() if not k in to_drop})

        return teamgames

    def team_opponents(self, season, season_start=None, season_end=None, pkl_fname=None):
        '''
        Downloads and parses range of team_opponents, optionally saves to pickle file

        Arguments:
            season (str): in YYYY-YY format
            season_start (str): in %Y-%m-%d format, default is actual start of season
            season_end (str): in %Y-%m-%d format, default is actual end of season
            pkl_fname (optional [str]): example - 'scoreboards_2015-16.pkl'

        Returns:
             topp (list): dicts

         Examples:
            a = NBAComAgent()
            topp = a.team_opponents('2014-15')

        '''

        topp = []

        # figure out season_start, season end
        if season_start is None:
            days = self.nbas.season_dates('2014-15')
            season_start = dt.datetime.strftime(days[-1], '%Y-%m-%d')
        else:
            days = date_list(season_end, season_start)

        for day in reversed(days):
            content = self.scraper.team_opponent_dashboard(season, DateFrom=season_start, DateTo=day)
            teamstats_opp = self.parser.team_opponent_dashboard(content)

            for team in teamstats_opp:
                fixed_team = {k.lower():v for k,v in team.iteritems()}
                fixed_team['game_date'] = dt.datetime.strftime(day, '%Y-%m-%d')
                topp.append(fixed_team)

        if pkl_fname:
            try:
                with open(pkl_fname, 'wb') as outfile:
                    pickle.dump(topp, outfile)

            except:
                self.logger.error('could not save scoreboards to {0}'.format(pkl_fname))

        return topp
Beispiel #3
0
class NBAComAgent(object):
    '''
    Performs script-like tasks using NBA.com API
    '''
    def __init__(self, db=None, cache_name=None, cookies=None):
        '''
        Arguments:
            cache_name: str for scraper cache_name
            cookies: cookie jar
            db: NBAComPg instance
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies)
        self.parser = NBAComParser()
        if db:
            self.db = db
            self.insert_db = True
        else:
            self.insert_db = False

    def _combined_player_boxscores(self, gid):
        '''
        Combines 5 types of boxscores from nba.com API into list of boxscores
        Arguments:
            gid: string game ID, with leading '00'
        Returns:
            list of player boxscores combined - traditional, advanced, misc, scoring, usage
        Examples:
            a = NBAComAgent()
            playerboxes = a.combined_player_boxscores('0020161001')
        '''

        traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional(
            self.scraper.boxscore_traditional(gid))
        adv_players, adv_team = self.parser.boxscore_advanced(
            self.scraper.boxscore_advanced(gid))
        misc_players, misc_team = self.parser.boxscore_misc(
            self.scraper.boxscore_misc(gid))
        scoring_players, scoring_team = self.parser.boxscore_scoring(
            self.scraper.boxscore_scoring(gid))
        usage_players = self.parser.boxscore_usage(
            self.scraper.boxscore_usage(gid))

        # now need to combine player and team boxscores
        players = merge(
            dict(),
            [{t['PLAYER_ID']: t
              for t in traditional_players},
             {t['PLAYER_ID']: t
              for t in adv_players}, {t['PLAYER_ID']: t
                                      for t in misc_players},
             {t['PLAYER_ID']: t
              for t in scoring_players},
             {t['PLAYER_ID']: t
              for t in usage_players}])
        return players.values()

    def combined_team_boxscores(self, gid):
        '''
        Combines 5 types of boxscores from nba.com API into list of boxscores
        Arguments:
            gid: string game ID, with leading '00'
        Returns:
            list of team boxscores - combined traditional, advanced, misc, scoring
        Examples:
            a = NBAComAgent()
            teamboxes = a.combined_team_boxscores('0020161001')
        '''
        traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional(
            self.scraper.boxscore_traditional(gid))
        adv_players, adv_teams = self.parser.boxscore_advanced(
            self.scraper.boxscore_advanced(gid))
        misc_players, misc_teams = self.parser.boxscore_misc(
            self.scraper.boxscore_misc(gid))
        scoring_players, scoring_teams = self.parser.boxscore_scoring(
            self.scraper.boxscore_scoring(gid))

        # now need to combine player and team boxscores
        teams = merge(
            dict(),
            [{t['TEAM_ID']: t
              for t in traditional_teams},
             {t['TEAM_ID']: t
              for t in adv_teams}, {t['TEAM_ID']: t
                                    for t in misc_teams},
             {t['TEAM_ID']: t
              for t in scoring_teams}])
        return list(teams.values())

    def linescores(self):
        '''
        Updates gamesmeta table with game_information
        '''
        #q = """SELECT '00' || game_id, to_char(game_date, 'YYYYmmdd') FROM gamesmeta
        #    WHERE season = (select max(season) from seasons) AND game_date < now()::date AND  q1 IS NULL
        #    ORDER BY game_date DESC;"""
        #q = """SELECT '00' || game_id as gid, to_char(game_date, 'YYYYmmdd') as gd FROM cs_games
        #    WHERE game_date < (CURRENT_TIMESTAMP AT TIME ZONE 'CST')::date AND
        #    game_id NOT IN (SELECT DISTINCT game_id FROM boxv2015)
        #    ORDER BY game_date DESC;"""
        q = """SELECT '00' || game_id as gid, to_char(game_date, 'YYYYmmdd') as gd FROM games
            WHERE game_date < localdate() AND
            season > 2015 AND game_id NOT IN (SELECT DISTINCT game_id FROM boxv2015)
            ORDER BY game_date DESC;"""

        for g in self.db.select_dict(q):
            try:
                content = self.scraper.boxscore_v2015(g['gid'], g['gd'])
                v, h = self.parser.boxscore_v2015(content)
                self.db.insert_dicts([v, h], 'boxv2015')
                logging.info('finished {} - {}'.format(g['gd'], g['gid']))
            except Exception as e:
                logging.error('could not get {}'.format(g))
                logging.exception(e)

    def new_players(self, season):
        '''
        Updates players table with missing players

        Arguments:
            season (str): in YYYY-YY format

        Returns:
            list of players to add to stats.players

        Examples:
            a = NBAComAgent(cache_name='newplayers', cookies=httplib.CookieJar(), db=NBAComPg(...))
            np = a.new_players(season='2015-16')
        '''
        content = self.scraper.players_v2015(season)
        players = self.parser.players_v2015(content)
        currids = set([int(p.get('personId', 0)) for p in players])
        logging.debug(currids)
        allids = set(
            self.db.select_list('SELECT nbacom_player_id from players'))
        missing = currids - allids
        if missing:
            np = [p for p in players if int(p['personId']) in missing]
            if self.insert_db:
                self.db.insert_players_v2015(np)
            return np
        else:
            return None

    def player_boxscores_combined(self):
        '''
        Fetches player boxscores combined

        Arguments:
            season: str in YYYY-YY format (2015-16)

        Returns:
             players (list): player boxscores
        '''
        pboxes = []
        gids = self.db.select_list(missing_player_boxscores())
        if not gids:
            logging.error('no missing gameids found')
            return None
        logging.info('there are {} missing game boxscores'.format(len(gids)))
        for gid in gids:
            logging.info('getting {}'.format(gid))
            box = self._combined_player_boxscores(gid)
            if not box:
                logging.error('no box for {}'.format(gid))
                continue
            if self.insert_db:
                self.db.insert_player_boxscores(box)
                pboxes.append(box)
        return [item for sublist in pboxes for item in sublist]

    def player_boxscores_combined(self):
        '''
        Fetches player boxscores combined

        Arguments:
            season: str in YYYY-YY format (2015-16)

        Returns:
             players (list): player boxscores
        '''
        pboxes = []
        gids = self.db.select_list(missing_player_boxscores())
        if not gids:
            logging.error('no missing gameids found')
            return None
        logging.info('there are {} missing game boxscores'.format(len(gids)))
        for gid in gids:
            logging.info('getting {}'.format(gid))
            box = self._combined_player_boxscores(gid)
            if not box:
                logging.error('no box for {}'.format(gid))
                continue
            if self.insert_db:
                self.db.insert_player_boxscores(box)
                pboxes.append(box)
        return [item for sublist in pboxes for item in sublist]

    def player_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches player_gamelogs and updates player_gamelogs table

        Arguments:
            season: str in YYYY-YY format (2015-16)
            date_from: str in YYYY-mm-dd format
            date_to: str in YYYY-mm-dd format

        Returns:
             players (list): player dictionary of stats + dfs points
        '''
        pgl = self.parser.season_gamelogs(
            self.scraper.season_gamelogs(season, 'P'), 'P')
        if self.insert_db:
            self.db.insert_player_gamelogs(pgl)
        return pgl

    def playerstats(self,
                    season,
                    date_from=None,
                    date_to=None,
                    all_missing=False):
        '''
        Fetches playerstats and updates player_gamelogs table

        Arguments:
            season: str in YYYY-YY format (2015-16)
            date_from: str in YYYY-mm-dd format
            date_to: str in YYYY-mm-dd format
            all_missing: boolean

        Returns:
             players (list): player dictionary of stats + dfs points

        Examples:
            a = NBAComAgent()
            np = a.playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')
        '''
        if date_from and date_to:
            ps_base = self.parser.playerstats(
                self.scraper.playerstats(season,
                                         DateFrom=date_from,
                                         DateTo=date_to))
            ps_advanced = self.parser.playerstats(
                self.scraper.playerstats(season,
                                         DateFrom=date_from,
                                         DateTo=date_to,
                                         MeasureType='Advanced'))
            ps = [
                merge(dict(), [psb, psadv])
                for psb, psadv in zip(ps_base, ps_advanced)
            ]
            if self.insert_db:
                self.db.insert_playerstats(ps, as_of=date_to)
            return ps
        elif all_missing:
            pstats = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(missing_playerstats()):
                daystr = datetostr(day, 'nba')
                ps_base = self.parser.playerstats(
                    self.scraper.playerstats(season,
                                             DateFrom=start,
                                             DateTo=daystr))
                ps_advanced = self.parser.playerstats(
                    self.scraper.playerstats(season,
                                             DateFrom=start,
                                             DateTo=daystr,
                                             MeasureType='Advanced'))
                ps = [
                    merge(dict(), [psadv, psb])
                    for psb, psadv in zip(ps_base, ps_advanced)
                ]
                pstats[daystr] = ps
                if self.insert_db:
                    self.db.insert_playerstats(ps, as_of=daystr)
                    logging.info('completed {}'.format(daystr))
            return pstats
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def scoreboards(self, season_start, season_end):
        '''
        Downloads and parses range of scoreboards

        Arguments:
            season_start (str): in %Y-%m-%d format
            season_end (str): in %Y-%m-%d format

        Returns:
             scoreboards (list): scoreboard dicts

         Examples:
            a = NBAComAgent()
            sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15')
        '''
        scoreboards = []
        for day in reversed(date_list(season_end, season_start)):
            game_date = dt.datetime.strftime(day, '%Y-%m-%d')
            scoreboard_json = self.scraper.scoreboard(game_date=game_date)
            scoreboard = self.parser.scoreboard(scoreboard_json,
                                                game_date=game_date)
            scoreboards.append(scoreboard)

        if self.insert_db:
            self.db.insert_scoreboards(scoreboards)
        return scoreboards

    def team_boxscores_combined(self):
        '''
        Fetches team boxscores combined

        Returns:
             tboxes: list of boxscores
        '''
        tboxes = []
        gids = self.db.select_list(missing_team_boxscores())
        if not gids:
            logging.error('no missing gameids found')
            return None
        logging.info('there are {} missing game boxscores'.format(len(gids)))
        for gid in gids:
            logging.info('getting {}'.format(gid))
            box = self.combined_team_boxscores(gid)
            if not box:
                logging.error('no box for {}'.format(gid))
                continue
            if self.insert_db:
                self.db.insert_team_boxscores(box)
                tboxes.append(box)
        return [item for sublist in tboxes for item in sublist]

    def team_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches team_gamelogs and updates cs_team_gamelogs table

        Arguments:
             season (str): in YYYY-YY format (2015-16)

        Returns:
             team_gl (list): player dictionary of stats

        Examples:
            a = NBAComAgent()
            tgl = a.team_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True)

        '''
        content = self.scraper.season_gamelogs(season=season,
                                               player_or_team='T')
        tgl = self.parser.season_gamelogs(content, 'T')
        mtgl = self.db.missing_tgl()
        if tgl and mtgl:
            toins = [gl for gl in tgl if gl.get('GAME_ID', None) in mtgl]
            if self.insert_db:
                self.db.insert_team_gamelogs(toins)
            return toins
        else:
            logging.error('no team gamelogs to insert')

    def teamstats(self,
                  season,
                  date_from=None,
                  date_to=None,
                  all_missing=False):
        '''
        Fetches teamstats and updates database table

        Arguments:
             season (str): in YYYY-YY format (2015-16)
             date_from (str): in %Y-%m-%d format, default beginning of season
             date_from (str): in %Y-%m-%d format, default yesterday
             all_missing: boolean

        Returns:
             list of team dictionary of basic and advanced stats

         Examples:
            a = NBAComAgent()
            ps = a.teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')
            ps = a.teamstats(season='2015-16', all_missing=True)
        '''
        if date_from and date_to:
            ts_base = self.parser.teamstats(
                self.scraper.teamstats(season,
                                       DateFrom=date_from,
                                       DateTo=date_to))
            ts_advanced = self.parser.teamstats(
                self.scraper.teamstats(season,
                                       DateFrom=date_from,
                                       DateTo=date_to,
                                       MeasureType='Advanced'))
            ts = [
                merge(dict(), [psb, psadv])
                for psb, psadv in zip(ts_base, ts_advanced)
            ]
            if self.insert_db:
                self.db.insert_teamstats(ts, as_of=date_to)
            return ts
        elif all_missing:
            tstats = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(missing_teamstats()):
                daystr = datetostr(day, 'nba')
                ts_base = self.parser.teamstats(
                    self.scraper.teamstats(season,
                                           DateFrom=start,
                                           DateTo=daystr))
                ts_advanced = self.parser.teamstats(
                    self.scraper.teamstats(season,
                                           DateFrom=start,
                                           DateTo=daystr,
                                           MeasureType='Advanced'))
                ts = [
                    merge(dict(), [psb, psadv])
                    for psb, psadv in zip(ts_base, ts_advanced)
                ]
                tstats[daystr] = ts
                if self.insert_db:
                    self.db.insert_teamstats(ts, as_of=daystr)
                    logging.debug('teamstats: completed {}'.format(daystr))
                else:
                    logging.error('did not insert: {}'.format(ts))
            return tstats
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def team_opponent_dashboards(self,
                                 season,
                                 date_from=None,
                                 date_to=None,
                                 all_missing=False):
        '''
        Downloads and parses range of team_opponents

        Arguments:
            season (str): in YYYY-YY format
            date_from (str): in %Y-%m-%d format, default is actual start of season
            date_to (str): in %Y-%m-%d format, default is actual end of season
            all_missing (bool): get all missing dashboards

        Returns:
             topp (list): dicts

         Examples:
            a = NBAComAgent()
            topp = a.team_opponent_dashboards('2014-15')
        '''
        if date_from and date_to:
            content = self.scraper.team_opponent_dashboard(season,
                                                           DateFrom=date_from,
                                                           DateTo=date_to)
            topp = self.parser.team_opponent_dashboard(content)
            if self.insert_db:
                self.db.insert_team_opponent_dashboards(topp, as_of=date_to)
            return topp

        elif all_missing:
            topps = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(missing_team_opponent_dashboard()):
                daystr = datetostr(day, 'nba')
                content = self.scraper.team_opponent_dashboard(season,
                                                               DateFrom=start,
                                                               DateTo=daystr)
                topp = self.parser.team_opponent_dashboard(content)
                if self.insert_db:
                    self.db.insert_team_opponent_dashboards(topp, as_of=daystr)
                topps[daystr] = topp
            return topps

        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def update_player_positions(self):
        '''
        Trying to make sure all position data is current
        Only info in nba.com is PLAYER key, this is only Guard, etc.
        Unclear where the PG, etc. comes from
        TODO: this is not functional yet
        '''
        if not self.db:
            raise ValueError('need database connection to update players')
        q = """SELECT nbacom_player_id FROM stats.players2 WHERE nbacom_position IS NULL or nbacom_position = ''"""
        uq = """UPDATE stats.players2 SET nbacom_position = '{}' WHERE nbacom_player_id = {}"""

        for pid in self.db.select_list(q):
            logging.debug('getting {}'.format(pid))
            pinfo = self.parser.player_info(
                self.scraper.player_info(pid, '2015-16'))
            if pinfo.get('POSITION'):
                self.db.update(uq.format(pinfo.get('POSITION'), pid))
                logging.debug('inserted {}'.format(
                    pinfo.get('DISPLAY_FIRST_LAST')))
Beispiel #4
0
class NBAComAgent(object):
    '''
    Performs script-like tasks using NBA.com API
    '''
    def __init__(self,
                 db=None,
                 cache_name='nbacom-agent',
                 cookies=None,
                 table_names=None):
        '''
        Args:
            db (NBAPostgres): instance
            cache_name (str): for scraper cache_name
            cookies: cookie jar
            table_names (dict): Database table names

        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies)
        self.parser = NBAComParser()
        self.db = db
        if table_names:
            self.table_names = table_names
        else:
            self.table_names = {
                'pgl': 'player_gamelogs',
                'tgl': 'team_gamelogs',
                'seas': 'season',
                'pl': 'player',
                'ps': 'playerstats_daily',
                'ts': 'teamstats_daily',
                'tod': 'team_opponent_dashboard',
                'pbs': 'player_boxscores_combined',
                'tbs': 'team_boxscores_combined',
                'box2': 'game_boxscores',
                'tm': 'team'
            }

    def combined_boxscores(self, game_ids=None):
        '''
            Combines 5 types of boxscores (traditional, advanced, misc, scoring, usage) into list of team boxscore

            Arguments:
                game_id (str): game ID, with leading '00'

            Returns:
                tuple: list of player boxscores, list of team boxscores

            '''
        player_boxscores = []
        team_boxscores = []
        if not game_ids:
            game_ids = set(self.db.select_list(missing_player_boxscores())) | \
                   set(self.db.select_list(missing_team_boxscores()))

        # need union of missing player and team
        for game_id in game_ids:
            # make sure game_id in correct format when requesting boxscores
            gid = nbacom_idstr(game_id)
            logging.info(
                'agent.nbacom.combined_boxscores: getting {}'.format(gid))

            # get 5 types of boxscores
            traditional_players, traditional_teams, traditional_starter_bench = \
                self.parser.boxscore_traditional(self.scraper.boxscore_traditional(gid))
            adv_players, adv_teams = self.parser.boxscore_advanced(
                self.scraper.boxscore_advanced(gid))
            misc_players, misc_teams = self.parser.boxscore_misc(
                self.scraper.boxscore_misc(gid))
            scoring_players, scoring_teams = self.parser.boxscore_scoring(
                self.scraper.boxscore_scoring(gid))
            usage_players = self.parser.boxscore_usage(
                self.scraper.boxscore_usage(gid))

            # combine player and team boxscores, respectively
            players_combined = list(
                merge_many(dict(),
                           [{t['PLAYER_ID']: t
                             for t in traditional_players},
                            {t['PLAYER_ID']: t
                             for t in adv_players},
                            {t['PLAYER_ID']: t
                             for t in misc_players},
                            {t['PLAYER_ID']: t
                             for t in scoring_players},
                            {t['PLAYER_ID']: t
                             for t in usage_players}]).values())
            teams_combined = list(
                merge_many(dict(), [{
                    t['TEAM_ID']: t
                    for t in traditional_teams
                }, {t['TEAM_ID']: t
                    for t in adv_teams}, {t['TEAM_ID']: t
                                          for t in misc_teams},
                                    {t['TEAM_ID']: t
                                     for t in scoring_teams}]).values())

            # now add to the database
            self.db.safe_insert_dicts(player_boxscores_table(players_combined),
                                      self.table_names['pbs'])
            self.db.safe_insert_dicts(team_boxscores_table(teams_combined),
                                      self.table_names['tbs'])
        return (player_boxscores, team_boxscores)

    def game_boxscores(self, game_ids=None):
        '''
        Updates table with game_information
        
        Args:
            None
            
        Returns:
            None
        '''
        if not game_ids:
            game_ids = self.db.select_dict(missing_game_boxscores())

        for g in game_ids:
            try:
                content = self.scraper.boxscore_v2015(g['gid'], g['gd'])
                v, h = self.parser.boxscore_v2015(content)
                self.db._insert_dict(v, self.table_names['box2'])
                self.db._insert_dict(h, self.table_names['box2'])
                logging.info('finished {} - {}'.format(g['gd'], g['gid']))
                return [v, h]
            except Exception as e:
                logging.error('could not get {}'.format(g))
                logging.exception(e)
                return None

    def gleague_players(self, year):
        '''
        Updates player table with g-league players

        Args:
            year (int): 2017, etc.

        Returns:
            None

        '''
        content = self.scraper.gleague_players(year)
        players = self.parser.gleague_players(content)
        for glp in gleague_player_table(players):
            self.db._insert_dict(glp, self.table_names['pl'])
        return players

    def player_gamelogs(self, season_code, date_from=None, date_to=None):
        '''
        Fetches player_gamelogs and updates player_gamelogs table

        Args:
            season_code (str): in YYYY-YY format (2017-18)
            date_from (str): in YYYY-mm-dd format, default None
            date_to (str): in YYYY-mm-dd format, default None

        Returns:
             list: of player dict

        '''
        # get all player gamelogs from nba.com
        content = self.scraper.season_gamelogs(season_code,
                                               'P',
                                               date_from=date_from,
                                               date_to=date_to)
        pgl = self.parser.season_gamelogs(content, 'P')
        pgl_s = set(
            ['{}-{}'.format(gl['GAME_ID'], gl['PLAYER_ID']) for gl in pgl])

        # compare to gamelogs in database: refresh view then compare
        dbpgl = self.db.select_dict(
            'SELECT nbacom_game_id, nbacom_player_id FROM cs_player_gamelogs')
        dbpgl_s = set([
            '00{}-{}'.format(gl['nbacom_game_id'], gl['nbacom_player_id'])
            for gl in dbpgl
        ])

        # only try to insert missing gamelogs
        missing = pgl_s - dbpgl_s
        to_ins = [
            gl for gl in pgl
            if '{}-{}'.format(gl['GAME_ID'], gl['PLAYER_ID']) in missing
        ]
        self.db.insert_dicts(player_gamelogs_table(to_ins), 'player_gamelogs')
        return pgl

    def playerstats(self,
                    season_code,
                    per_mode='Totals',
                    date_from=None,
                    date_to=None,
                    all_missing=False):
        '''
        Fetches playerstats and updates playerstats table

        Args:
            season_code (str): in YYYY-YY format, e.g. 2017-18
            per_mode (str): 'Totals', 'PerGame', etc.
            date_from (str): in YYYY-mm-dd format, default None
            date_to (str): in YYYY-mm-dd format, default None
            all_missing (bool): default False

        Returns:
             None

        '''
        if date_from and date_to:
            ps_base = self.parser.playerstats(
                self.scraper.playerstats(season_code,
                                         per_mode=per_mode,
                                         DateFrom=date_from,
                                         DateTo=date_to))
            ps_advanced = self.parser.playerstats(
                self.scraper.playerstats(season_code,
                                         per_mode=per_mode,
                                         DateFrom=date_from,
                                         DateTo=date_to,
                                         MeasureType='Advanced'))
            ps = [
                merge_two(psb, psadv)
                for psb, psadv in zip(ps_base, ps_advanced)
            ]
            vals = playerstats_table(ps, date_to, per_mode)
            logging.info(vals)
            self.db.insert_dicts(vals, self.table_names['ps'])
        elif all_missing:
            start = datetostr(d=season_start(season_code=season_code),
                              fmt='nba')
            for day in self.db.select_list(missing_playerstats(per_mode)):
                daystr = datetostr(day, 'nba')
                base_content = self.scraper.playerstats(season_code,
                                                        per_mode=per_mode,
                                                        DateFrom=start,
                                                        DateTo=daystr)
                ps_base = self.parser.playerstats(base_content,
                                                  per_mode=per_mode)
                adv_content = self.scraper.playerstats(season_code,
                                                       per_mode=per_mode,
                                                       DateFrom=start,
                                                       DateTo=daystr,
                                                       MeasureType='Advanced')
                ps_advanced = self.parser.playerstats(adv_content, per_mode)
                ps = [
                    merge_two(psadv, psb)
                    for psb, psadv in zip(ps_base, ps_advanced)
                ]
                self.db.insert_dicts(playerstats_table(ps, daystr, per_mode),
                                     self.table_names['ps'])
                logging.info('completed {}'.format(daystr))
        else:
            raise ValueError(
                'agent.nbacom.playerstats: need to specify dates or set all_missing to true'
            )

    def refresh_materialized(self):
        '''
        Calls postgres function to refresh all materialized views.
        
        '''
        refreshq = """SELECT RefreshAllMaterializedViews('*', true);"""
        try:
            self.db.execute(refreshq)
        except Exception as e:
            logging.exception(
                'could not refresh materialized views: {}'.format(e))

    def team_gamelogs(self, season_code, date_from=None, date_to=None):
        '''
        Fetches team_gamelogs and updates team_gamelogs table

        Args:
            season_code (str): in YYYY-YY format (2017-18)
            date_from (str): in YYYY-mm-dd format, default None
            date_to (str): in YYYY-mm-dd format, default None
            
        Returns:
            list: of dict

        '''
        content = self.scraper.season_gamelogs(season_code, 'T', date_from,
                                               date_to)
        tgl = self.parser.season_gamelogs(content, 'T')
        tgl_s = set(
            ['{}-{}'.format(gl['GAME_ID'], gl['TEAM_ID']) for gl in tgl])

        # compare team gamelogs to those already in database
        dbtgl_s = set(
            self.db.select_list(
                """SELECT CONCAT(nbacom_game_id, '-', nbacom_team_id) FROM cs_team_gamelogs"""
            ))

        # only try to insert missing gamelogs
        missing = tgl_s - dbtgl_s
        to_ins = [
            gl for gl in tgl
            if '{}-{}'.format(gl['GAME_ID'], gl['TEAM_ID']) in missing
        ]
        for item in team_gamelogs_table(to_ins):
            self.db._insert_dict(item, self.table_names['tgl'])
        return tgl

    def team_opponent_dashboards(self,
                                 season_code,
                                 per_mode,
                                 date_from=None,
                                 date_to=None,
                                 all_missing=False):
        '''
        Downloads and parses range of team_opponents

        Arguments:
            season_code (str): in YYYY-YY format
            per_mode (str): 'Totals', 'PerGame', etc.
            date_from (str): in %Y-%m-%d format, default is actual start of season
            date_to (str): in %Y-%m-%d format, default is actual end of season
            all_missing (bool): get all missing dashboards

        Returns:
            None
            
        '''
        if date_from and date_to:
            content = self.scraper.team_opponent_dashboard(season_code,
                                                           per_mode=per_mode,
                                                           date_from=date_from,
                                                           date_to=date_to)
            topp = self.parser.team_opponent_dashboard(content)
            self.db.insert_dicts(team_opponent_dashboards_table(topp, date_to),
                                 self.table_names.get('tod'))
        elif all_missing:
            topps = {}
            start = datetostr(d=season_start(season_code=season_code),
                              fmt='nba')
            for day in self.db.select_list(
                    missing_team_opponent_dashboard(per_mode)):
                daystr = datetostr(day, 'nba')
                logging.info('starting dashboards for {}'.format(daystr))
                content = self.scraper.team_opponent_dashboard(season_code,
                                                               date_from=start,
                                                               date_to=date_to)
                topp = self.parser.team_opponent_dashboard(content, per_mode)
                self.db.insert_dicts(
                    team_opponent_dashboards_table(topp, daystr, per_mode),
                    self.table_names.get('tod'))
            return topps
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def teamstats(self,
                  season_code,
                  per_mode,
                  date_from=None,
                  date_to=None,
                  all_missing=False):
        '''
        Fetches teamstats and updates database table

        Args:
             season_code (str): in YYYY-YY format (2015-16)
             per_mode (str): 'Totals', 'PerGame', etc.
             date_from (str): in %Y-%m-%d format
             date_to (str): in %Y-%m-%d format
             all_missing (bool): looks for all missing teamstats from season

        Returns:
             list of team dictionary of basic and advanced stats

        '''
        if date_from and date_to:
            content_base = self.scraper.teamstats(season_code,
                                                  per_mode=per_mode,
                                                  date_from=date_from,
                                                  date_to=date_to)
            ts_base = self.parser.teamstats(content_base, per_mode)
            content_adv = self.scraper.teamstats(season_code,
                                                 per_mode=per_mode,
                                                 date_from=date_from,
                                                 date_to=date_to,
                                                 MeasureType='Advanced')
            ts_advanced = self.parser.teamstats(content_adv, per_mode)
            ts_merged = [
                merge_two(tsb, tsadv)
                for tsb, tsadv in zip(ts_base, ts_advanced)
            ]
            self.db.insert_dicts(teamstats_table(ts_merged, date_to, per_mode),
                                 self.table_names['ts'])
        elif all_missing:
            start = datetostr(d=season_start(season_code=season_code),
                              fmt='nba')
            for day in self.db.select_list(missing_teamstats(per_mode)):
                logging.info('teamstats: getting {}'.format(day))
                daystr = datetostr(day, 'nba')
                content_base = self.scraper.teamstats(season_code,
                                                      per_mode=per_mode,
                                                      date_from=start,
                                                      date_to=daystr)
                ts_base = self.parser.teamstats(content_base, per_mode)
                content_adv = self.scraper.teamstats(season_code,
                                                     per_mode=per_mode,
                                                     date_from=start,
                                                     date_to=daystr,
                                                     MeasureType='Advanced')
                ts_advanced = self.parser.teamstats(content_adv, per_mode)
                ts = [
                    merge_two(tsb, tsadv)
                    for tsb, tsadv in zip(ts_base, ts_advanced)
                ]
                self.db.insert_dicts(teamstats_table(ts, daystr, per_mode),
                                     self.table_names['ts'])
                logging.debug('teamstats: completed {}'.format(daystr))
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')
Beispiel #5
0
class NBAComParser_test(unittest.TestCase):
    def setUp(self):
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        self.nbs = NBAComScraper()
        self.nbp = NBAComParser()

    def _get_from_file(self, fn):
        if os.path.isfile(fn):
            try:
                with open(fn) as x:
                    return x.read()
            except:
                logging.exception('could not read from file ' + fn)
        return None

    def _one_player_gamelogs(self, fn='one_player_gamelogs.json'):
        return self._get_from_file(fn)

    def _one_team_gamelogs(self, fn='one_team_gamelogs.json'):
        return self._get_from_file(fn)

    def _player_boxscore_traditional(self, fn='boxscore_traditional.json'):
        return self._get_from_file(fn)

    def _player_gamelogs(self, fn='player_gamelogs.json'):
        return self._get_from_file(fn)

    def _player_info(self, fn='player_info.json'):
        return self._get_from_file(fn)

    def _players(self, fn='players.json'):
        return self._get_from_file(fn)

    def _playerstats(self, fn='player_stats.json'):
        return self._get_from_file(fn)

    def _scoreboard(self, fn='scoreboard.json'):
        return self._get_from_file(fn)

    def _team_dashboard(self, fn='team_dashboard.json'):
        return self._get_from_file(fn)

    def _team_gamelogs(self, fn='team_game_logs.json'):
        return self._get_from_file(fn)

    def _team_opponent_dashboard(self, fn='team_opponent_dashboard.json'):
        return self._get_from_file(fn)

    def _teams(self, fn='teams.js'):
        return self._get_from_file(fn)

    def _teamstats(self, fn='teamstats.json'):
        return self._get_from_file(fn)

    def test_boxscore(self):
        content = self._player_boxscore_traditional()
        if not content:
            content = self.nbs.boxscore_traditional('0021500001')
        players, teams, starter_bench = self.nbp.boxscore_traditional(content)
        self.assertIsInstance(players, list)
        self.assertIsInstance(teams, list)
        self.assertIsInstance(players[0], dict)
        self.assertIn('MIN_PLAYED', players[0],
                      "players should have min_played")
        self.assertIn('MIN_PLAYED', players[8],
                      "players should have min_played")
        self.assertIn('MIN', teams[0], "teams should have min")
        self.assertIn('MIN', teams[1], "teams should have min")

    def test_one_player_gamelogs(self):
        content = self._one_player_gamelogs()
        if not content:
            content = self.nbs.one_player_gamelogs('203083', '2015-16')
        gls = self.nbp.one_player_gamelogs(content)
        self.assertIsInstance(gls, list)
        self.assertIsInstance(gls[0], dict)
        logging.info(gls)

    def test_one_team_gamelogs(self):
        content = self._one_team_gamelogs()
        if not content:
            team_id = '1610612765'
            season = '2015-16'
            content = self.nbs.one_team_gamelogs(team_id, season)
        gls = self.nbp.one_team_gamelogs(content)
        self.assertIsInstance(gls, list)
        self.assertIsInstance(gls[0], dict)

    def test_player_info(self):
        content = self._player_info()
        if not content:
            content = self.nbs.player_info('201939', '2015-16')
        pinfo = self.nbp.player_info(content)
        self.assertIsInstance(pinfo, dict)

    def test_players(self):
        content = self._players()
        if not content:
            content = self.nbs.players(season='2016-17', cs_only='1')
        ps = self.nbp.players(content)
        self.assertIsInstance(ps, list)
        self.assertIsNotNone(ps[0], 'ps should not be none')

    def test_playerstats(self):
        content = self._playerstats()
        if not content:
            statdate = '2016-01-20'
            content = self.nbs.playerstats('2015-16')
            ps = self.nbp.playerstats(content, statdate)
        self.assertIsInstance(ps, list)
        self.assertEqual(ps[0].get('STATDATE'), statdate)

    def test_season_gamelogs(self):
        team_content = self._team_gamelogs()
        player_content = self._player_gamelogs()
        if not team_content:
            team_content = self.nbs.season_gamelogs('2015-16', 'T')
        if not player_content:
            player_content = self.nbs.season_gamelogs('2015-16', 'P')
        team_gl = self.nbp.season_gamelogs(team_content, 'T')
        player_gl = self.nbp.season_gamelogs(player_content, 'P')
        self.assertIsInstance(player_gl, list)
        self.assertIsInstance(team_gl, list)
        self.assertIn('GAME_ID', player_gl[0], "players should have game_id")
        self.assertIn('GAME_ID', team_gl[0], "teams should have game_id")

    def test_team_dashboard(self):
        content = self._team_dashboard()
        if not content:
            team_id = '1610612765'
            season = '2015-16'
            content = self.nbs.team_dashboard(team_id, season)
        td = self.nbp.team_dashboard(content)
        self.assertIsInstance(td, dict)
        self.assertIn('overall', td, "dashboard has overall")

    def test_team_opponent_dashboard(self):
        content = self._team_opponent_dashboard()
        if not content:
            season = '2015-16'
            content = self.nbs.team_opponent_dashboard(season)
        td = self.nbp.team_opponent_dashboard(content)
        self.assertIsInstance(td, list)
        self.assertIsNotNone(td[0], 'td should not be None')

    def test_teams(self):
        content = self._teams()
        if not content:
            content = self.nbs.teams()
        tms = self.nbp.teams(content)
        self.assertIsInstance(tms, dict)
        self.assertIsNotNone(tms, 'tms should not be None')

    def test_teamstats(self):
        content = self._teamstats()
        if not content:
            season = '2015-16'
            content = self.nbs.teamstats(season)
        ts = self.nbp.teamstats(content)
        self.assertIsInstance(ts, list)
        self.assertIsNotNone(ts[0], 'ts should not be None')
Beispiel #6
0
class NBAComAgent(object):
    '''
    Performs script-like tasks using NBA.com API
    '''
    def __init__(self, db=None, cache_name=None, cookies=None):
        '''
        Arguments:
            cache_name: str for scraper cache_name
            cookies: cookie jar
            db: NBAComPg instance
        '''
        self.scraper = NBAComScraper(cache_name=cache_name, cookies=cookies)
        self.parser = NBAComParser()
        if db:
            self.db = db
            self.insert_db = True
        else:
            self.insert_db = False

    def _merge(self, merge_dico, dico_list):
        '''
        See http://stackoverflow.com/questions/28838291/merging-multiple-dictionaries-in-python
        Args:
            merge_dico:
            dico_list:
        Returns:
            merged dictionary
        '''
        for dico in dico_list:
            for key, value in dico.items():
                if type(value) == type(dict()):
                    merge_dico.setdefault(key, dict())
                    self._merge(merge_dico[key], [value])
                else:
                    merge_dico[key] = value
        return merge_dico

    def combined_player_boxscores(self, gid):
        '''
        Combines 5 types of boxscores from nba.com API into list of boxscores
        Arguments:
            gid: string game ID, with leading '00'
        Returns:
            list of player boxscores combined - traditional, advanced, misc, scoring, usage
        Examples:
            a = NBAComAgent()
            playerboxes = a.combined_player_boxscores('0020161001')
        '''
        traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional(
            self.scraper.boxscore_traditional(gid))
        adv_players, adv_team = self.parser.boxscore_advanced(
            self.scraper.boxscore_advanced(gid))
        misc_players, misc_team = self.parser.boxscore_misc(
            self.scraper.boxscore_misc(gid))
        scoring_players, scoring_team = self.parser.boxscore_scoring(
            self.scraper.boxscore_scoring(gid))
        usage_players = self.parser.boxscore_usage(
            self.scraper.boxscore_usage(gid))

        # now need to combine player and team boxscores
        players = self._merge(
            dict(),
            [{t['PLAYER_ID']: t
              for t in traditional_players},
             {t['PLAYER_ID']: t
              for t in adv_players}, {t['PLAYER_ID']: t
                                      for t in misc_players},
             {t['PLAYER_ID']: t
              for t in scoring_players},
             {t['PLAYER_ID']: t
              for t in usage_players}])
        return list(players.values())

    def combined_team_boxscores(self, gid):
        '''
        Combines 5 types of boxscores from nba.com API into list of boxscores
        Arguments:
            gid: string game ID, with leading '00'
        Returns:
            list of team boxscores - combined traditional, advanced, misc, scoring
        Examples:
            a = NBAComAgent()
            teamboxes = a.combined_team_boxscores('0020161001')
        '''
        traditional_players, traditional_teams, traditional_starter_bench = self.parser.boxscore_traditional(
            self.scraper.boxscore_traditional(gid))
        adv_players, adv_teams = self.parser.boxscore_advanced(
            self.scraper.boxscore_advanced(gid))
        misc_players, misc_teams = self.parser.boxscore_misc(
            self.scraper.boxscore_misc(gid))
        scoring_players, scoring_teams = self.parser.boxscore_scoring(
            self.scraper.boxscore_scoring(gid))

        # now need to combine player and team boxscores
        teams = self._merge(
            dict(),
            [{t['TEAM_ID']: t
              for t in traditional_teams},
             {t['TEAM_ID']: t
              for t in adv_teams}, {t['TEAM_ID']: t
                                    for t in misc_teams},
             {t['TEAM_ID']: t
              for t in scoring_teams}])
        return list(teams.values())

    def new_players(self, season):
        '''
        Updates players table with missing players

        Arguments:
            season (str): in YYYY-YY format
        Returns:
            list of players to add to stats.players
        Examples:
            a = NBAComAgent(cache_name='newplayers', cookies=httplib.CookieJar(), db=NBAComPg(...))
            np = a.new_players(season='2015-16')
        '''
        content = self.scraper.players(season, cs_only=1)
        players = self.parser.players(content)
        currids = set([int(p.get('PERSON_ID', 0)) for p in players])
        allids = set(
            self.db.select_list('SELECT nbacom_player_id from players'))
        missing = currids - allids
        if missing:
            np = [
                self.parser.player_info(self.scraper.player_info(pid, season))
                for pid in missing
            ]
            if self.insert_db:
                self.db.insert_players(np)
            return np
        else:
            return None

    def player_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches player_gamelogs and updates player_gamelogs table

        Arguments:
            season: str in YYYY-YY format (2015-16)
            date_from: str in YYYY-mm-dd format
            date_to: str in YYYY-mm-dd format
            insert_db: add list to database

        Returns:
             players (list): player dictionary of stats + dfs points

        Examples:
            a = NBAComAgent()
            np = a.player_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True)
        '''
        # mpgl -> list of game_ids for which there are no player gamelogs
        # filter gamelogs to those from missing game_ids
        pgl = self.parser.season_gamelogs(
            self.scraper.season_gamelogs(season, 'P'), 'P')
        mpgl = self.db.missing_pgl()
        if pgl and mpgl:
            pgl = [gl for gl in pgl if gl.get('GAME_ID', None) in mpgl]
            # now make sure you have no new players
            currids = set([int(p.get('PERSON_ID', 0)) for p in pgl])
            allids = set(
                self.db.select_list('SELECT nbacom_player_id from players'))
            if self.insert_db:
                self.db.insert_players([
                    self.parser.player_info(
                        self.scraper.player_info(pid, season))
                    for pid in currids - allids
                ])
                self.db.insert_player_gamelogs(pgl)
            return pgl
        else:
            return None

    def playerstats(self,
                    season,
                    date_from=None,
                    date_to=None,
                    all_missing=False):
        '''
        Fetches playerstats and updates player_gamelogs table

        Arguments:
            season: str in YYYY-YY format (2015-16)
            date_from: str in YYYY-mm-dd format
            date_to: str in YYYY-mm-dd format
            all_missing: boolean

        Returns:
             players (list): player dictionary of stats + dfs points

        Examples:
            a = NBAComAgent()
            np = a.playerstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')
        '''
        if date_from and date_to:
            ps_base = self.parser.playerstats(
                self.scraper.playerstats(season,
                                         DateFrom=date_from,
                                         DateTo=date_to))
            ps_advanced = self.parser.playerstats(
                self.scraper.playerstats(season,
                                         DateFrom=date_from,
                                         DateTo=date_to,
                                         MeasureType='Advanced'))
            ps = [
                self._merge(dict(), [psb, psadv])
                for psb, psadv in zip(ps_base, ps_advanced)
            ]
            if self.insert_db:
                self.db.insert_playerstats(ps, as_of=date_to)
            return ps
        elif all_missing:
            pstats = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(
                    'SELECT game_date FROM missing_playerstats'):
                daystr = datetostr(day, 'nba')
                ps_base = self.parser.playerstats(
                    self.scraper.playerstats(season,
                                             DateFrom=start,
                                             DateTo=daystr))
                ps_advanced = self.parser.playerstats(
                    self.scraper.playerstats(season,
                                             DateFrom=start,
                                             DateTo=daystr,
                                             MeasureType='Advanced'))
                ps = [
                    self._merge(dict(), [psb, psadv])
                    for psb, psadv in zip(ps_base, ps_advanced)
                ]
                pstats[daystr] = ps
                if self.insert_db:
                    self.db.insert_playerstats(ps, as_of=daystr)
            return pstats
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def scoreboards(self, season_start, season_end):
        '''
        Downloads and parses range of scoreboards

        Arguments:
            season_start (str): in %Y-%m-%d format
            season_end (str): in %Y-%m-%d format

        Returns:
             scoreboards (list): scoreboard dicts

         Examples:
            a = NBAComAgent()
            sb = a.scoreboards(season_start='2015-10-27', season_end='2016-04-15')
        '''
        scoreboards = []
        for day in reversed(date_list(season_end, season_start)):
            game_date = dt.datetime.strftime(day, '%Y-%m-%d')
            scoreboard_json = self.scraper.scoreboard(game_date=game_date)
            scoreboard = self.parser.scoreboard(scoreboard_json,
                                                game_date=game_date)
            scoreboards.append(scoreboard)

        if self.insert_db:
            self.db.insert_scoreboards(scoreboards)
        return scoreboards

    def team_gamelogs(self, season, date_from=None, date_to=None):
        '''
        Fetches team_gamelogs and updates cs_team_gamelogs table

        Arguments:
             season (str): in YYYY-YY format (2015-16)

        Returns:
             team_gl (list): player dictionary of stats

        Examples:
            a = NBAComAgent()
            tgl = a.team_gamelogs(season='2015-16', date_from='2016-03-01', date_to='2016-03-08', insert_db=True)

        '''
        content = self.scraper.season_gamelogs(season=season,
                                               player_or_team='T')
        tgl = self.parser.season_gamelogs(content, 'T')
        mtgl = self.db.missing_tgl()
        if tgl and mtgl:
            toins = [gl for gl in tgl if gl.get('GAME_ID', None) in mtgl]
            if self.insert_db:
                self.db.insert_team_gamelogs(toins)
            return toins
        else:
            logging.error('no team gamelogs to insert')

    def teamstats(self,
                  season,
                  date_from=None,
                  date_to=None,
                  all_missing=False):
        '''
        Fetches teamstats and updates database table

        Arguments:
             season (str): in YYYY-YY format (2015-16)
             date_from (str): in %Y-%m-%d format, default beginning of season
             date_from (str): in %Y-%m-%d format, default yesterday
             all_missing: boolean

        Returns:
             list of team dictionary of basic and advanced stats

         Examples:
            a = NBAComAgent()
            ps = a.teamstats(season='2015-16', date_from='2016-03-01', date_to='2016-03-08')
            ps = a.teamstats(season='2015-16', all_missing=True)
        '''
        if date_from and date_to:
            ts_base = self.parser.teamstats(
                self.scraper.teamstats(season,
                                       DateFrom=date_from,
                                       DateTo=date_to))
            ts_advanced = self.parser.teamstats(
                self.scraper.teamstats(season,
                                       DateFrom=date_from,
                                       DateTo=date_to,
                                       MeasureType='Advanced'))
            ts = [
                self._merge(dict(), [psb, psadv])
                for psb, psadv in zip(ts_base, ts_advanced)
            ]
            if self.insert_db:
                self.db.insert_teamstats(ts, as_of=date_to)
            return ts
        elif all_missing:
            tstats = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(
                    'SELECT game_date FROM missing_teamstats'):
                daystr = datetostr(day, 'nba')
                ts_base = self.parser.teamstats(
                    self.scraper.teamstats(season,
                                           DateFrom=start,
                                           DateTo=daystr))
                ts_advanced = self.parser.teamstats(
                    self.scraper.teamstats(season,
                                           DateFrom=start,
                                           DateTo=daystr,
                                           MeasureType='Advanced'))
                ts = [
                    self._merge(dict(), [psb, psadv])
                    for psb, psadv in zip(ts_base, ts_advanced)
                ]
                tstats[daystr] = ts
                if self.insert_db:
                    self.db.insert_teamstats(ts, as_of=daystr)
                    logging.info('teamstats: completed {}'.format(daystr))
                else:
                    logging.error('did not insert: {}'.format(ts))
            return tstats
        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def team_opponent_dashboards(self,
                                 season,
                                 date_from=None,
                                 date_to=None,
                                 all_missing=False):
        '''
        Downloads and parses range of team_opponents

        Arguments:
            season (str): in YYYY-YY format
            date_from (str): in %Y-%m-%d format, default is actual start of season
            date_to (str): in %Y-%m-%d format, default is actual end of season
            all_missing (bool): get all missing dashboards

        Returns:
             topp (list): dicts

         Examples:
            a = NBAComAgent()
            topp = a.team_opponent_dashboards('2014-15')
        '''
        if date_from and date_to:
            content = self.scraper.team_opponent_dashboard(season,
                                                           DateFrom=date_from,
                                                           DateTo=date_to)
            topp = self.parser.team_opponent_dashboard(content)
            if self.insert_db:
                self.db.insert_team_opponent_dashboards(topp, as_of=date_to)
            return topp

        elif all_missing:
            topps = {}
            start = datetostr(d=season_start(season), site='nba')
            for day in self.db.select_list(
                    'SELECT * FROM missing_team_opponent_dashboard'):
                daystr = datetostr(day, 'nba')
                content = self.scraper.team_opponent_dashboard(season,
                                                               DateFrom=start,
                                                               DateTo=daystr)
                topp = self.parser.team_opponent_dashboard(content)
                if self.insert_db:
                    self.db.insert_team_opponent_dashboards(topp, as_of=daystr)
                topps[daystr] = topp
            return topps

        else:
            raise ValueError(
                'need to specify dates or set all_missing to true')

    def update_player_positions(self):
        '''
        Trying to make sure all position data is current
        Only info in nba.com is PLAYER key, this is only Guard, etc.
        Unclear where the PG, etc. comes from
        TODO: this is not functional yet
        '''
        if not self.db:
            raise ValueError('need database connection to update players')
        q = """SELECT nbacom_player_id FROM stats.players2 WHERE nbacom_position IS NULL or nbacom_position = ''"""
        uq = """UPDATE stats.players2 SET nbacom_position = '{}' WHERE nbacom_player_id = {}"""

        for pid in self.db.select_list(q):
            logging.info('getting {}'.format(pid))
            pinfo = self.parser.player_info(
                self.scraper.player_info(pid, '2015-16'))
            if pinfo.get('POSITION'):
                self.db.update(uq.format(pinfo.get('POSITION'), pid))
                logging.info('inserted {}'.format(
                    pinfo.get('DISPLAY_FIRST_LAST')))