def update_simulate(db): with nfldb.Tx(db) as cursor: log('Simulating %d games...' % len(_simulate['gsis_ids'])) for gid in _simulate['gsis_ids']: g = game_from_id_simulate(cursor, gid) log('\t%s' % g) g._save(cursor) log('done.') if len(_simulate['gsis_ids']) == 0: return True _simulate['drives'] += 1 return False
def update_game_schedules(db): """ Updates the schedule data of every game in the database. """ update_nflgame_schedules() log('Updating all game schedules... ', end='') with nfldb.Tx(db) as cursor: lock_tables(cursor) for gsis_id in nflgame.sched.games: g = game_from_id(cursor, gsis_id) for table, prim, vals in g._rows: nfldb.db._upsert(cursor, table, vals, prim) log('done.')
def update_current_week_schedule(db): update_nflgame_schedules() phase_map = nfldb.types.Enums._nflgame_season_phase phase, year, week = nfldb.current(db) log('Updating schedule for (%s, %d, %d)' % (phase, year, week)) with nfldb.Tx(db) as cursor: for gsis_id, info in dict(nflgame.sched.games).items(): if year == info['year'] and week == info['week'] \ and phase == phase_map[info['season_type']]: g = game_from_id(cursor, gsis_id) for table, prim, vals in g._rows: nfldb.db._upsert(cursor, table, vals, prim) log('done.')
def doit(): log('-' * 79) log('STARTING NFLDB UPDATE AT %s' % now()) log('Connecting to nfldb... ', end='') db = nfldb.connect() log('done.') # We always insert dates and times as UTC. log('Setting timezone to UTC... ', end='') nfldb.set_timezone(db, 'UTC') log('done.') if update_turnovers: update_game_turnovers(db, update_turnovers) elif update_schedules: update_game_schedules(db) elif simulate is not None: done = update_simulate(db) if done: log('Simulation complete.') return True else: with nfldb.Tx(db) as cursor: # Update players first. This is important because if an unknown # player is discovered in the game data, the player will be # upserted. We'd like to avoid that because it's slow. update_players(cursor, player_interval) # Now update games. update_games(db, batch_size=batch_size) log('Closing database connection... ', end='') db.close() log('done.') log('FINISHED NFLDB UPDATE AT %s' % now()) log('-' * 79)
def update_game_turnovers(db, since): """ Updates the turnover data of every game in the database. """ log('Updating all game turnovers... ', end='') with nfldb.Tx(db) as cursor: cursor.execute( 'SELECT MAX(season_year) as max, MIN(season_year) as min from game' ) start_year = None stop_year = None for row in cursor.fetchall(): start_year = row['min'] stop_year = row['max'] if not start_year: return lock_tables(cursor) cursor.execute("SET TIME ZONE 'UTC'") for year in range(start_year, stop_year + 1): games = nflgame.games(year) for game in games: dbg = nfldb.Game.from_id(db, game.eid) try: home = int(game.data['home']['stats']['team']['trnovr']) except KeyError: home = 0 try: away = int(game.data['away']['stats']['team']['trnovr']) except KeyError: away = 0 if home != db.home_turnovers or away != db.away_turnovers: dbg.home_turnovers = home dbg.away_turnovers = away dbg._save(cursor)
def run(player_interval=43200, interval=None, update_schedules=False, batch_size=5, simulate=None): global _simulate if simulate is not None: assert not update_schedules, \ "update_schedules is incompatible with simulate" db = nfldb.connect() # Expand `simulate` to a real list of gsis ids since prefixes # are allowed. # lt = [gid + ('\x79' * (10 - len(gid))) for gid in simulate] # q = nfldb.Query(db).game(gsis_id__ge=simulate, gsis_id__le=lt) q = nfldb.Query(db).game(gsis_id__eq=simulate) games = sorted(q.as_games(), key=lambda g: g.gsis_id) '''for g in games: if not g.finished: log('Game "%s" has not finished yet and therefore cannot ' 'be simulated.' % g.gsis_id) sys.exit(1)''' # simulate = [q.gsis_id] simulate = [g.gsis_id for g in games] yesno = input( '*** PLEASE READ! ***\n\n' 'Simulation mode will simulate games being played by deleting\n' 'games from the database and slowly re-adding drives in the game\n' 'one-by-one at a constant rate indicated by --interval.\n' 'You may cancel the simulation at any time and run \n' '`nfldb-update` to bring the database back up to date.\n\n' 'Please make sure that no other `nfldb-update` processes are\n' 'running during a simulation.\n\n' ' %s\n\n' 'Are you sure you want to simulate these games? [y/n] ' % '\n '.join(simulate)) if yesno.strip().lower()[0] != 'y': sys.exit(0) _simulate = { 'gsis_ids': simulate, 'drives': 0, } log('Running simulation... Deleting games: %s' % ', '.join(simulate)) with nfldb.Tx(db) as cursor: cursor.execute('DELETE FROM game WHERE gsis_id IN %s', (tuple(simulate),)) if interval is None: # Simulation implies a repeated update at some interval. interval = 10 log('--interval not set, so using default simulation ' 'interval of %d seconds.' % interval) def doit(): log('-' * 79) log('STARTING NFLDB UPDATE AT %s' % now()) log('Connecting to nfldb... ', end='') db = nfldb.connect() log('done.') # We always insert dates and times as UTC. log('Setting timezone to UTC... ', end='') nfldb.set_timezone(db, 'UTC') log('done.') if update_schedules: update_game_schedules(db) elif simulate is not None: done = update_simulate(db) if done: log('Simulation complete.') return True else: with nfldb.Tx(db) as cursor: # Update players first. This is important because if an unknown # player is discovered in the game data, the player will be # upserted. We'd like to avoid that because it's slow. update_players(cursor, player_interval) # Now update games. update_games(db, batch_size=batch_size) log('Closing database connection... ', end='') db.close() log('done.') log('FINISHED NFLDB UPDATE AT %s' % now()) log('-' * 79) if interval is None: doit() else: if interval < 15 and simulate is None: log('WARNING: Interval %d is shorter than 15 seconds and is ' 'probably wasteful.\nAre you sure you know what you are doing?' % interval) while True: done = doit() if done: sys.exit(0) time.sleep(interval)
def update_games(db, batch_size=5): """ Does a single monolithic update of players, games, drives and plays. If `update` terminates, then the database will be completely up to date with all current NFL data known by `nflgame`. Note that while `update` is executing, all writes to the following tables will be blocked: player, game, drive, play, play_player. The huge lock is used so that there aren't any races introduced when updating the database. Other clients will still be able to read from the database. """ # The complexity of this function has one obvious culprit: # performance reasons. On the one hand, we want to make infrequent # updates quick by bulk-inserting game, drive and play data. On the # other hand, we need to be able to support incremental updates # as games are played. # # Therefore, games and their data are split up into three chunks. # # The first chunk are games that don't exist in the database at all. # The games have their *schedule* data bulk-inserted as a place holder # in the `game` table. This results in all of the `home_*` and `away_*` # fields being set to 0. The schedule data is bulk inserted without # ever initializing a `nflgame.game.Game` object, which can be costly. # # The second chunk are games that have schedule data in the database # but have nothing else. In the real world, this corresponds to games # in the current season that haven't started yet. Or more usefully, # every game when the database is empty. This chunk of games has its # drives and play bulk-inserted. # # The third and final chunk are games that are being played. These games # have the slowest update procedure since each drive and play need to be # "upserted." That is, inserted if it doesn't exist or updated if it # does. On the scale of a few games, performance should be reasonable. # (Data needs to be updated because mistakes can be made on the fly and # corrected by the NFL. Blech.) # # Comparatively, updating players is pretty simple. Player meta data # changes infrequently, which means we can update it on a larger interval # and we can be less careful about performance. with nfldb.Tx(db) as cursor: lock_tables(cursor) log('Updating season phase, year and week... ', end='') update_season_state(cursor) log('done.') nada = games_missing(cursor) if len(nada) > 0: log('Adding schedule data for %d games... ' % len(nada), end='') insert = OrderedDict() for gid in nada: g = game_from_schedule(cursor, gid) for table, prim, vals in g._rows: insert.setdefault(table, []).append(vals) for table, vals in insert.items(): nfldb.db._big_insert(cursor, table, vals) log('done.') scheduled = games_scheduled(cursor) if len(scheduled) > 0: log('Bulk inserting data for %d games...' % len(scheduled)) bulk_insert_game_data(cursor, scheduled, batch_size=batch_size) log('done.') playing = games_in_progress(cursor) if len(playing) > 0: log('Updating %d games in progress...' % len(playing)) for gid in playing: g = game_from_id(cursor, gid) log('\t%s' % g) g._save(cursor) log('done.') # This *must* come after everything else because it could set # the 'finished' flag to true on a game that hasn't been completely # updated yet. # # See issue #42. update_current_week_schedule(db)
def training_wr(period): db = nfldb.connect() current_period = period data = {} start_period = period.offset(-10) team_def_data = {} team_off_data = {} with nfldb.Tx(db) as cursor: cursor.execute(''' SELECT p.pos_team AS team, CASE WHEN p.pos_team = g.home_team THEN g.away_team ELSE g.home_team END AS def_team, %d - (g.season_year * 17 + g.week) as week_offset, g.season_year, g.week, sum(ap.passing_yds) AS passing_yds, sum(ap.passing_att) AS passing_att, sum(ap.passing_cmp) AS passing_cmp, sum(ap.passing_tds) AS passing_tds, sum(ap.passing_int) AS passing_int, count(distinct p.play_id) AS total_plays FROM agg_play ap, play p, game g WHERE g.gsis_id = p.gsis_id AND g.gsis_id = ap.gsis_id AND p.play_id = ap.play_id AND (g.season_year * 100 + g.week) BETWEEN %d AND %d AND g.season_type='Regular' GROUP By 1, 2, 3, 4, 5 ''' % (period.season_year * 17 + period.week, start_period.season_year * 100 + start_period.week, period.season_year * 100 + period.week)) for row in cursor.fetchall(): if row['def_team'] not in team_def_data: team_def_data[row['def_team']] = {} offset = str(row['week_offset']) team_def_data[row['def_team']].update({ 'def_passing_yds_' + offset: row['passing_yds'], 'def_passing_att_' + offset: row['passing_att'], 'def_passing_cmp_' + offset: row['passing_cmp'], 'def_passing_tds_' + offset: row['passing_tds'], 'def_passing_int_' + offset: row['passing_int'], 'def_total_plays_' + offset: row['total_plays'], }) if row['team'] not in team_off_data: team_off_data[row['team']] = {} team_off_data[row['team']].update({ 'off_passing_yds_' + offset: row['passing_yds'], 'off_passing_att_' + offset: row['passing_att'], 'off_passing_cmp_' + offset: row['passing_cmp'], 'off_passing_tds_' + offset: row['passing_tds'], 'off_passing_int_' + offset: row['passing_int'], 'off_total_plays_' + offset: row['total_plays'], }) with nfldb.Tx(db) as cursor: cursor.execute(''' SELECT p.pos_team AS team, CASE WHEN p.pos_team = g.home_team THEN g.away_team ELSE g.home_team END AS def_team, %d - (g.season_year * 17 + g.week) as week_offset, g.season_year, g.week, player.full_name, player.player_id, sum(pp.receiving_tar) AS receiving_tar, sum(pp.receiving_yds) AS receiving_yds, sum(pp.receiving_rec) AS receiving_rec, sum(pp.receiving_tds) AS receiving_tds, sum(pp.rushing_yds) AS rushing_yds, sum(pp.rushing_tds) AS rushing_tds, sum(pp.rushing_att) AS rushing_att FROM game g, play p, play_player pp, player player WHERE g.gsis_id = p.gsis_id AND g.gsis_id = pp.gsis_id AND p.play_id = pp.play_id AND pp.player_id = player.player_id AND (g.season_year * 100 + g.week) BETWEEN %d AND %d AND g.season_type='Regular' AND player.position = 'WR' GROUP By 1, 2, 3, 4, 5, 6, 7 ''' % (period.season_year * 17 + period.week, start_period.season_year * 100 + start_period.week, period.season_year * 100 + period.week)) for row in cursor.fetchall(): player_id = row['player_id'] full_name = row['full_name'] tuple = (player_id, full_name) if tuple not in data: data[tuple] = {} offset = str(row['week_offset']) data[tuple].update({ 'receiving_tar_' + offset: row['receiving_tar'], 'receiving_yds_' + offset: row['receiving_yds'], 'receiving_rec_' + offset: row['receiving_rec'], 'receiving_tds_' + offset: row['receiving_tds'], 'rushing_yds_' + offset: row['rushing_yds'], 'rushing_tds_' + offset: row['rushing_tds'], 'rushing_att_' + offset: row['rushing_att'], }) if offset == '0': data[tuple].update(team_def_data[row['def_team']]) data[tuple].update(team_off_data[row['team']]) formatted = {} for player_tuple, stats in data.items(): for stat, value in stats.items(): if not formatted.has_key(stat): formatted[stat] = {} formatted[stat][player_tuple] = value df = pd.DataFrame(formatted) df.index.set_names(['player_id', 'player_name'], inplace=True) return df
# Get arguments name = sys.argv[1] sql_file = name + '.sql' out_file = name + '.json' # Connect to database db = nfldb.connect() # Open sql query with open(sql_file, 'r') as file: sql_query = file.read() # Run sql query play_players = [] with nfldb.Tx(db) as cursor: cursor.execute(sql_query) for row in cursor.fetchall(): play_players.append(row) # Calculate replacements qbs = [ pp['fantasy_points'] / pp['games_played'] for pp in play_players if pp['position'] == 'QB' ] qbs.sort(reverse=True) rbs = [ pp['fantasy_points'] / pp['games_played'] for pp in play_players if pp['position'] == 'RB' ] rbs.sort(reverse=True)
from settings import positions from settings import positions_sort #get a DB connection db = nfldb.connect() #get current status of the season db_season_phase = nfldb.current(db)[ 0] #can be preseacon, regular, or postseason db_season_year = nfldb.current(db)[1] #current season year db_current_week = nfldb.current(db)[2] #current week of this season phase #search the teams table and return the teams in the DB #the DB includes an Unknown, old Jacksonville, and St Louis Rams teams we want #to filter out since they aren't part of the current season teams = [] with nfldb.Tx(db) as cur: cur.execute( "SELECT * FROM team WHERE team_id NOT IN ('UNK', 'JAX', 'STL')") raw_teams = cur.fetchall() cur.close() teams = {} #take the RealDictRows returned from the DB query and convert them to dictionaries #with extra attributes for each player category we can work with for raw_team in raw_teams: team = dict(raw_team) for position in positions: team[position] = 0 teams[team['team_id']] = team
def test_num_turnovers(db): import nfldb.update with nfldb.Tx(db) as cursor: g = nfldb.update.game_from_id(cursor, '2013090800') assert g.home_turnovers == 2 assert g.away_turnovers == 3