def connect(**kwargs): """ A wrapper around nfldb.connect. Returns a `psycopg2._psycopg.connection` object from the `psycopg2.connect` function. If database is `None`, then `connect` will look for a configuration file using `nfldb.config` with `config_path`. Otherwise, the connection will use the parameters given. If `database` is `None` and no config file can be found, then an `IOError` exception is raised. This function will also compare the current schema version of the database against the API version `nfldb.api_version` and assert that they are equivalent. If the schema library version is less than the the API version, then the schema will be automatically upgraded. If the schema version is newer than the library version, then this function will raise an assertion error. An assertion error will also be raised if the schema version is 0 and the database is not empty. In addition, a similar updating will be performed for nfldbproj. N.B. The `timezone` parameter should be set to a value that PostgreSQL will accept. Select from the `pg_timezone_names` view to get a list of valid time zones. """ conn = nfldb_connect(**kwargs) # Migration. nfldbproj_sversion = nfldbproj_schema_version(conn) assert nfldbproj_sversion <= nfldbproj_api_version, \ 'nfldbproj library version {} is older than schema with version {}'.format( nfldbproj_api_version, nfldbproj_sversion ) assert api_version == nfldb_api_version, \ 'nfldbproj expects nfldb version {}, encountered nfldb version {}'.format( nfldb_api_version, api_version ) assert nfldbproj_sversion > 0 or (nfldbproj_sversion == 0 and _nfldbproj_is_empty(conn)), \ 'nfldbproj schema has version 0 but is not empty' set_timezone(conn, 'UTC') _migrate_nfldbproj(conn, nfldbproj_api_version) if kwargs.get('timezone'): set_timezone(conn, kwargs['timezone']) # Bind SQL -> Python casting functions for additional types. _bind_type(conn, 'fantasy_position', _Enum._pg_cast(ProjEnums.fantasy_position)) _bind_type(conn, 'proj_scope', _Enum._pg_cast(ProjEnums.proj_scope)) return conn
def doit(): log('-' * 79) log('STARTING NFLDB UPDATE AT %s' % now()) log('Connecting to nfldb... ', end='') db = nfldb.connect() log('done.') # We always insert dates and times as UTC. log('Setting timezone to UTC... ', end='') nfldb.set_timezone(db, 'UTC') log('done.') if update_turnovers: update_game_turnovers(db, update_turnovers) elif update_schedules: update_game_schedules(db) elif simulate is not None: done = update_simulate(db) if done: log('Simulation complete.') return True else: with nfldb.Tx(db) as cursor: # Update players first. This is important because if an unknown # player is discovered in the game data, the player will be # upserted. We'd like to avoid that because it's slow. update_players(cursor, player_interval) # Now update games. update_games(db, batch_size=batch_size) log('Closing database connection... ', end='') db.close() log('done.') log('FINISHED NFLDB UPDATE AT %s' % now()) log('-' * 79)
def doit(): log('-' * 79) log('STARTING NFLDB UPDATE AT %s' % now()) log('Connecting to nfldb... ', end='') db = nfldb.connect() log('done.') # We always insert dates and times as UTC. log('Setting timezone to UTC... ', end='') nfldb.set_timezone(db, 'UTC') log('done.') if update_schedules: update_game_schedules(db) elif simulate is not None: done = update_simulate(db) if done: log('Simulation complete.') return True else: with nfldb.Tx(db) as cursor: # Update players first. This is important because if an unknown # player is discovered in the game data, the player will be # upserted. We'd like to avoid that because it's slow. update_players(cursor, player_interval) # Now update games. update_games(db, batch_size=batch_size) log('Closing database connection... ', end='') db.close() log('done.') log('FINISHED NFLDB UPDATE AT %s' % now()) log('-' * 79)