예제 #1
0
def update_simulate(db):
    with nfldb.Tx(db) as cursor:
        log('Simulating %d games...' % len(_simulate['gsis_ids']))
        for gid in _simulate['gsis_ids']:
            g = game_from_id_simulate(cursor, gid)
            log('\t%s' % g)
            g._save(cursor)
        log('done.')

        if len(_simulate['gsis_ids']) == 0:
            return True
    _simulate['drives'] += 1
    return False
예제 #2
0
def update_game_schedules(db):
    """
    Updates the schedule data of every game in the database.
    """
    update_nflgame_schedules()
    log('Updating all game schedules... ', end='')
    with nfldb.Tx(db) as cursor:
        lock_tables(cursor)
        for gsis_id in nflgame.sched.games:
            g = game_from_id(cursor, gsis_id)
            for table, prim, vals in g._rows:
                nfldb.db._upsert(cursor, table, vals, prim)
    log('done.')
예제 #3
0
def update_current_week_schedule(db):
    update_nflgame_schedules()

    phase_map = nfldb.types.Enums._nflgame_season_phase
    phase, year, week = nfldb.current(db)
    log('Updating schedule for (%s, %d, %d)' % (phase, year, week))
    with nfldb.Tx(db) as cursor:
        for gsis_id, info in dict(nflgame.sched.games).items():
            if year == info['year'] and week == info['week'] \
                    and phase == phase_map[info['season_type']]:
                g = game_from_id(cursor, gsis_id)
                for table, prim, vals in g._rows:
                    nfldb.db._upsert(cursor, table, vals, prim)
    log('done.')
예제 #4
0
파일: update.py 프로젝트: gt6796c/nfldb
    def doit():
        log('-' * 79)
        log('STARTING NFLDB UPDATE AT %s' % now())

        log('Connecting to nfldb... ', end='')
        db = nfldb.connect()
        log('done.')

        # We always insert dates and times as UTC.
        log('Setting timezone to UTC... ', end='')
        nfldb.set_timezone(db, 'UTC')
        log('done.')

        if update_turnovers:
            update_game_turnovers(db, update_turnovers)
        elif update_schedules:
            update_game_schedules(db)
        elif simulate is not None:
            done = update_simulate(db)
            if done:
                log('Simulation complete.')
                return True
        else:
            with nfldb.Tx(db) as cursor:
                # Update players first. This is important because if an unknown
                # player is discovered in the game data, the player will be
                # upserted. We'd like to avoid that because it's slow.
                update_players(cursor, player_interval)

            # Now update games.
            update_games(db, batch_size=batch_size)

        log('Closing database connection... ', end='')
        db.close()
        log('done.')

        log('FINISHED NFLDB UPDATE AT %s' % now())
        log('-' * 79)
예제 #5
0
파일: update.py 프로젝트: gt6796c/nfldb
def update_game_turnovers(db, since):
    """
    Updates the turnover data of every game in the database.
    """
    log('Updating all game turnovers... ', end='')

    with nfldb.Tx(db) as cursor:
        cursor.execute(
            'SELECT MAX(season_year) as max, MIN(season_year) as min from game'
        )
        start_year = None
        stop_year = None
        for row in cursor.fetchall():
            start_year = row['min']
            stop_year = row['max']
        if not start_year:
            return

        lock_tables(cursor)
        cursor.execute("SET TIME ZONE 'UTC'")
        for year in range(start_year, stop_year + 1):
            games = nflgame.games(year)
            for game in games:
                dbg = nfldb.Game.from_id(db, game.eid)
                try:
                    home = int(game.data['home']['stats']['team']['trnovr'])
                except KeyError:
                    home = 0
                try:
                    away = int(game.data['away']['stats']['team']['trnovr'])
                except KeyError:
                    away = 0
                if home != db.home_turnovers or away != db.away_turnovers:
                    dbg.home_turnovers = home
                    dbg.away_turnovers = away
                    dbg._save(cursor)
예제 #6
0
def run(player_interval=43200, interval=None, update_schedules=False,
        batch_size=5, simulate=None):
    global _simulate

    if simulate is not None:
        assert not update_schedules, \
            "update_schedules is incompatible with simulate"

        db = nfldb.connect()

        # Expand `simulate` to a real list of gsis ids since prefixes
        # are allowed.
        # lt = [gid + ('\x79' * (10 - len(gid))) for gid in simulate]
        # q = nfldb.Query(db).game(gsis_id__ge=simulate, gsis_id__le=lt)
        q = nfldb.Query(db).game(gsis_id__eq=simulate)
        games = sorted(q.as_games(), key=lambda g: g.gsis_id)
        '''for g in games:
            if not g.finished:
                log('Game "%s" has not finished yet and therefore cannot '
                    'be simulated.' % g.gsis_id)
                sys.exit(1)'''
        # simulate = [q.gsis_id]
        simulate = [g.gsis_id for g in games]

        yesno = input(
            '*** PLEASE READ! ***\n\n'
            'Simulation mode will simulate games being played by deleting\n'
            'games from the database and slowly re-adding drives in the game\n'
            'one-by-one at a constant rate indicated by --interval.\n'
            'You may cancel the simulation at any time and run \n'
            '`nfldb-update` to bring the database back up to date.\n\n'
            'Please make sure that no other `nfldb-update` processes are\n'
            'running during a simulation.\n\n'
            '    %s\n\n'
            'Are you sure you want to simulate these games? [y/n] '
            % '\n    '.join(simulate))
        if yesno.strip().lower()[0] != 'y':
            sys.exit(0)

        _simulate = {
            'gsis_ids': simulate,
            'drives': 0,
        }

        log('Running simulation... Deleting games: %s' % ', '.join(simulate))
        with nfldb.Tx(db) as cursor:
            cursor.execute('DELETE FROM game WHERE gsis_id IN %s',
                           (tuple(simulate),))

        if interval is None:
            # Simulation implies a repeated update at some interval.
            interval = 10
            log('--interval not set, so using default simulation '
                'interval of %d seconds.' % interval)

    def doit():
        log('-' * 79)
        log('STARTING NFLDB UPDATE AT %s' % now())

        log('Connecting to nfldb... ', end='')
        db = nfldb.connect()
        log('done.')

        # We always insert dates and times as UTC.
        log('Setting timezone to UTC... ', end='')
        nfldb.set_timezone(db, 'UTC')
        log('done.')

        if update_schedules:
            update_game_schedules(db)
        elif simulate is not None:
            done = update_simulate(db)
            if done:
                log('Simulation complete.')
                return True
        else:
            with nfldb.Tx(db) as cursor:
                # Update players first. This is important because if an unknown
                # player is discovered in the game data, the player will be
                # upserted. We'd like to avoid that because it's slow.
                update_players(cursor, player_interval)

            # Now update games.
            update_games(db, batch_size=batch_size)

        log('Closing database connection... ', end='')
        db.close()
        log('done.')

        log('FINISHED NFLDB UPDATE AT %s' % now())
        log('-' * 79)

    if interval is None:
        doit()
    else:
        if interval < 15 and simulate is None:
            log('WARNING: Interval %d is shorter than 15 seconds and is '
                'probably wasteful.\nAre you sure you know what you are doing?'
                % interval)
        while True:
            done = doit()
            if done:
                sys.exit(0)
            time.sleep(interval)
예제 #7
0
def update_games(db, batch_size=5):
    """
    Does a single monolithic update of players, games, drives and
    plays.  If `update` terminates, then the database will be
    completely up to date with all current NFL data known by `nflgame`.

    Note that while `update` is executing, all writes to the following
    tables will be blocked: player, game, drive, play, play_player.
    The huge lock is used so that there aren't any races introduced
    when updating the database. Other clients will still be able to
    read from the database.
    """
    # The complexity of this function has one obvious culprit:
    # performance reasons. On the one hand, we want to make infrequent
    # updates quick by bulk-inserting game, drive and play data. On the
    # other hand, we need to be able to support incremental updates
    # as games are played.
    #
    # Therefore, games and their data are split up into three chunks.
    #
    # The first chunk are games that don't exist in the database at all.
    # The games have their *schedule* data bulk-inserted as a place holder
    # in the `game` table. This results in all of the `home_*` and `away_*`
    # fields being set to 0. The schedule data is bulk inserted without
    # ever initializing a `nflgame.game.Game` object, which can be costly.
    #
    # The second chunk are games that have schedule data in the database
    # but have nothing else. In the real world, this corresponds to games
    # in the current season that haven't started yet. Or more usefully,
    # every game when the database is empty. This chunk of games has its
    # drives and play bulk-inserted.
    #
    # The third and final chunk are games that are being played. These games
    # have the slowest update procedure since each drive and play need to be
    # "upserted." That is, inserted if it doesn't exist or updated if it
    # does. On the scale of a few games, performance should be reasonable.
    # (Data needs to be updated because mistakes can be made on the fly and
    # corrected by the NFL. Blech.)
    #
    # Comparatively, updating players is pretty simple. Player meta data
    # changes infrequently, which means we can update it on a larger interval
    # and we can be less careful about performance.
    with nfldb.Tx(db) as cursor:
        lock_tables(cursor)

        log('Updating season phase, year and week... ', end='')
        update_season_state(cursor)
        log('done.')

        nada = games_missing(cursor)
        if len(nada) > 0:
            log('Adding schedule data for %d games... ' % len(nada), end='')
            insert = OrderedDict()
            for gid in nada:
                g = game_from_schedule(cursor, gid)
                for table, prim, vals in g._rows:
                    insert.setdefault(table, []).append(vals)
            for table, vals in insert.items():
                nfldb.db._big_insert(cursor, table, vals)
            log('done.')

        scheduled = games_scheduled(cursor)
        if len(scheduled) > 0:
            log('Bulk inserting data for %d games...' % len(scheduled))
            bulk_insert_game_data(cursor, scheduled, batch_size=batch_size)
            log('done.')

        playing = games_in_progress(cursor)
        if len(playing) > 0:
            log('Updating %d games in progress...' % len(playing))
            for gid in playing:
                g = game_from_id(cursor, gid)
                log('\t%s' % g)
                g._save(cursor)
            log('done.')

        # This *must* come after everything else because it could set
        # the 'finished' flag to true on a game that hasn't been completely
        # updated yet.
        #
        # See issue #42.
        update_current_week_schedule(db)
예제 #8
0
def training_wr(period):
    db = nfldb.connect()

    current_period = period
    data = {}

    start_period = period.offset(-10)
    team_def_data = {}
    team_off_data = {}
    with nfldb.Tx(db) as cursor:
        cursor.execute('''
            SELECT p.pos_team AS team,
                CASE WHEN p.pos_team = g.home_team THEN g.away_team
                    ELSE g.home_team
                    END AS def_team,
                %d - (g.season_year * 17 + g.week) as week_offset,
                g.season_year,
                g.week,
                sum(ap.passing_yds) AS passing_yds,
                sum(ap.passing_att) AS passing_att,
                sum(ap.passing_cmp) AS passing_cmp,
                sum(ap.passing_tds) AS passing_tds,
                sum(ap.passing_int) AS passing_int,
                count(distinct p.play_id) AS total_plays
            FROM agg_play ap,
                play p,
                game g
            WHERE g.gsis_id = p.gsis_id
                AND g.gsis_id = ap.gsis_id
                AND p.play_id = ap.play_id
                AND (g.season_year * 100 + g.week) BETWEEN %d AND %d
                AND g.season_type='Regular'
            GROUP By 1, 2, 3, 4, 5
        ''' % (period.season_year * 17 + period.week,
               start_period.season_year * 100 + start_period.week,
               period.season_year * 100 + period.week))
        for row in cursor.fetchall():
            if row['def_team'] not in team_def_data:
                team_def_data[row['def_team']] = {}
            offset = str(row['week_offset'])
            team_def_data[row['def_team']].update({
                'def_passing_yds_' + offset:
                row['passing_yds'],
                'def_passing_att_' + offset:
                row['passing_att'],
                'def_passing_cmp_' + offset:
                row['passing_cmp'],
                'def_passing_tds_' + offset:
                row['passing_tds'],
                'def_passing_int_' + offset:
                row['passing_int'],
                'def_total_plays_' + offset:
                row['total_plays'],
            })
            if row['team'] not in team_off_data:
                team_off_data[row['team']] = {}
            team_off_data[row['team']].update({
                'off_passing_yds_' + offset:
                row['passing_yds'],
                'off_passing_att_' + offset:
                row['passing_att'],
                'off_passing_cmp_' + offset:
                row['passing_cmp'],
                'off_passing_tds_' + offset:
                row['passing_tds'],
                'off_passing_int_' + offset:
                row['passing_int'],
                'off_total_plays_' + offset:
                row['total_plays'],
            })

    with nfldb.Tx(db) as cursor:
        cursor.execute('''
            SELECT p.pos_team AS team,
                CASE WHEN p.pos_team = g.home_team THEN g.away_team
                    ELSE g.home_team
                    END AS def_team,
                %d - (g.season_year * 17 + g.week) as week_offset,
                g.season_year,
                g.week,
                player.full_name,
                player.player_id,
                sum(pp.receiving_tar) AS receiving_tar,
                sum(pp.receiving_yds) AS receiving_yds,
                sum(pp.receiving_rec) AS receiving_rec,
                sum(pp.receiving_tds) AS receiving_tds,
                sum(pp.rushing_yds) AS rushing_yds,
                sum(pp.rushing_tds) AS rushing_tds,
                sum(pp.rushing_att) AS rushing_att
            FROM game g,
                play p,
                play_player pp,
                player player
            WHERE g.gsis_id = p.gsis_id
                AND g.gsis_id = pp.gsis_id
                AND p.play_id = pp.play_id
                AND pp.player_id = player.player_id
                AND (g.season_year * 100 + g.week) BETWEEN %d AND %d
                AND g.season_type='Regular'
                AND player.position = 'WR'
            GROUP By 1, 2, 3, 4, 5, 6, 7
        ''' % (period.season_year * 17 + period.week,
               start_period.season_year * 100 + start_period.week,
               period.season_year * 100 + period.week))
        for row in cursor.fetchall():
            player_id = row['player_id']
            full_name = row['full_name']
            tuple = (player_id, full_name)
            if tuple not in data:
                data[tuple] = {}
            offset = str(row['week_offset'])
            data[tuple].update({
                'receiving_tar_' + offset: row['receiving_tar'],
                'receiving_yds_' + offset: row['receiving_yds'],
                'receiving_rec_' + offset: row['receiving_rec'],
                'receiving_tds_' + offset: row['receiving_tds'],
                'rushing_yds_' + offset: row['rushing_yds'],
                'rushing_tds_' + offset: row['rushing_tds'],
                'rushing_att_' + offset: row['rushing_att'],
            })
            if offset == '0':
                data[tuple].update(team_def_data[row['def_team']])
                data[tuple].update(team_off_data[row['team']])

    formatted = {}
    for player_tuple, stats in data.items():
        for stat, value in stats.items():
            if not formatted.has_key(stat):
                formatted[stat] = {}
            formatted[stat][player_tuple] = value

    df = pd.DataFrame(formatted)
    df.index.set_names(['player_id', 'player_name'], inplace=True)
    return df
예제 #9
0
# Get arguments
name = sys.argv[1]
sql_file = name + '.sql'
out_file = name + '.json'

# Connect to database
db = nfldb.connect()

# Open sql query
with open(sql_file, 'r') as file:
    sql_query = file.read()

# Run sql query
play_players = []
with nfldb.Tx(db) as cursor:
    cursor.execute(sql_query)
    for row in cursor.fetchall():
        play_players.append(row)

# Calculate replacements
qbs = [
    pp['fantasy_points'] / pp['games_played'] for pp in play_players
    if pp['position'] == 'QB'
]
qbs.sort(reverse=True)
rbs = [
    pp['fantasy_points'] / pp['games_played'] for pp in play_players
    if pp['position'] == 'RB'
]
rbs.sort(reverse=True)
예제 #10
0
from settings import positions
from settings import positions_sort

#get a DB connection
db = nfldb.connect()
#get current status of the season
db_season_phase = nfldb.current(db)[
    0]  #can be preseacon, regular, or postseason
db_season_year = nfldb.current(db)[1]  #current season year
db_current_week = nfldb.current(db)[2]  #current week of this season phase

#search the teams table and return the teams in the DB
#the DB includes an Unknown, old Jacksonville, and St Louis Rams teams we want
#to filter out since they aren't part of the current season
teams = []
with nfldb.Tx(db) as cur:
    cur.execute(
        "SELECT * FROM team WHERE team_id NOT IN ('UNK', 'JAX', 'STL')")
    raw_teams = cur.fetchall()
    cur.close()

teams = {}

#take the RealDictRows returned from the DB query and convert them to dictionaries
#with extra attributes for each player category we can work with

for raw_team in raw_teams:
    team = dict(raw_team)
    for position in positions:
        team[position] = 0
    teams[team['team_id']] = team
예제 #11
0
파일: test_query.py 프로젝트: gt6796c/nfldb
def test_num_turnovers(db):
    import nfldb.update
    with nfldb.Tx(db) as cursor:
        g = nfldb.update.game_from_id(cursor, '2013090800')
        assert g.home_turnovers == 2
        assert g.away_turnovers == 3