def get_results(fuzzy, q, name_type, name): fuzzy = fuzzy % name_type similar = 'LOWER(%s) LIKE %%s' % name_type qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team, )) if position is not None: qposition = cursor.mogrify('position = %s', (position, )) fuzzy_filled = cursor.mogrify(fuzzy, (name, )) similar_filled = cursor.mogrify(similar, (name + '%', )) columns = types.Player._sql_select_fields( types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format(columns=', '.join(columns), where=sql.ands(similar_filled, fuzzy_filled + ' IS NOT NULL', 'team != \'UNK\'', qteam, qposition), limit=limit) cursor.execute(q) for row in cursor.fetchall(): results.append( (types.Player.from_row_dict(db, row), row['distance'])) return results
def as_drives(self): """ Executes the query and returns the results as a list of `nfldb.Drive` objects. """ self._assert_no_aggregate() results = [] with Tx(self._db, factory=tuple_cursor) as cursor: q = self._make_join_query(cursor, types.Drive) cursor.execute(q) for row in cursor.fetchall(): results.append(types.Drive.from_row_tuple(self._db, row)) return results
def show_where(self, aggregate=False): """ Returns an approximate WHERE clause corresponding to the criteria specified in `self`. Note that the WHERE clause given is never explicitly used for performance reasons, but one hopes that it describes the criteria in `self`. If `aggregate` is `True`, then aggregate criteria for the `play` and `play_player` tables is shown with aggregate functions applied. """ with Tx(self._db) as cur: return self._sql_where(cur, aggregate=aggregate) return ''
def as_players(self): """ Executes the query and returns the results as a list of `nfldb.Player` objects. """ self._assert_no_aggregate() results = [] with Tx(self._db) as cursor: q = self._make_join_query(cursor, types.Player) cursor.execute(q) for row in cursor.fetchall(): results.append(types.Player.from_row_dict(self._db, row)) return results
def current(db): """ Returns a triple of `nfldb.Enums.season_phase`, season year and week corresponding to values that `nfldb` thinks are current. Note that this only queries the database. Only the `nfldb-update` script fetches the current state from NFL.com. The values retrieved may be `None` if the season is over or if they haven't been updated yet by the `nfldb-update` script. """ with Tx(db, factory=tuple_cursor) as cursor: cursor.execute('SELECT season_type, season_year, week FROM meta') return cursor.fetchone() return tuple([None] * 3)
def as_play_players(self): """ Executes the query and returns the results as a list of `nlfdb.PlayPlayer` objects. This provides a way to access player statistics directly by bypassing play data. Usually the results of this method are passed to `nfldb.aggregate`. It is recommended to use `nfldb.Query.aggregate` and `nfldb.Query.as_aggregate` when possible, since it is significantly faster to sum statistics in the database as opposed to Python. """ self._assert_no_aggregate() results = [] with Tx(self._db, factory=tuple_cursor) as cursor: init = types.PlayPlayer.from_row_tuple q = self._make_join_query(cursor, types.PlayPlayer) cursor.execute(q) for row in cursor.fetchall(): results.append(init(self._db, row)) return results
def as_aggregate(self): """ Executes the query and returns the results as aggregated `nfldb.PlayPlayer` objects. This method is meant to be a more restricted but much faster version of `nfldb.aggregate`. Namely, this method uses PostgreSQL to compute the aggregate statistics while `nfldb.aggregate` computes them in Python code. If any sorting criteria is specified, it is applied to the aggregate *player* values only. """ class AggPP(types.PlayPlayer): @classmethod def _sql_field(cls, name, aliases=None): if name in cls._derived_combined: fields = cls._derived_combined[name] fields = [ cls._sql_field(f, aliases=aliases) for f in fields ] return ' + '.join(fields) elif name == 'points': fields = [ '(%s * %d)' % (cls._sql_field(f, aliases=aliases), pval) for f, pval in cls._point_values ] return ' + '.join(fields) else: sql = super(AggPP, cls)._sql_field(name, aliases=aliases) return 'SUM(%s)' % sql joins = '' results = [] with Tx(self._db) as cur: for ent in self._entities(): if ent is types.PlayPlayer: continue joins += types.PlayPlayer._sql_join_to_all(ent) sum_fields = list(types._player_categories.keys()) \ + AggPP._sql_tables['derived'] select_sum_fields = AggPP._sql_select_fields(sum_fields) where = self._sql_where(cur) having = self._sql_where(cur, aggregate=True) q = ''' SELECT play_player.player_id AS play_player_player_id, {sum_fields} FROM play_player {joins} WHERE {where} GROUP BY play_player.player_id HAVING {having} {order} '''.format( sum_fields=', '.join(select_sum_fields), joins=joins, where=sql.ands(where), having=sql.ands(having), order=self._sorter(AggPP).sql(), ) init = AggPP.from_row_dict cur.execute(q) for row in cur.fetchall(): results.append(init(self._db, row)) return results
def as_plays(self, fill=True): """ Executes the query and returns the results as a dictionary of `nlfdb.Play` objects that don't have the `play_player` attribute filled. The keys of the dictionary are play id tuples with the spec `(gsis_id, drive_id, play_id)`. The primary key membership SQL expression is also returned. """ def make_pid(play): return (play.gsis_id, play.drive_id, play.play_id) self._assert_no_aggregate() # This is pretty terrifying. # Apparently PostgreSQL can change the order of rows returned # depending on the columns selected. So e.g., if you sort by `down` # and limit to 20 results, you might get a different 20 plays if # you change which columns you're selecting. # This is pertinent here because if we're filling plays with player # statistics, then we are assuming that this order never changes. # To make the ordering consistent, we add the play's primary key to # the existing sort criteria, which guarantees that the sort will # always be the same. # (We are careful not to override the user specified # `self._sort_exprs`.) # # That was a lie. We override the user settings if the user asks # to sort by `gsis_id`, `drive_id` or `play_id`. consistent = [(c, 'asc') for c in ['gsis_id', 'drive_id', 'play_id']] sorter = Sorter(types.Play, self._sort_exprs, self._limit) sorter.add_exprs(*consistent) if not fill: results = [] with Tx(self._db, factory=tuple_cursor) as cursor: init = types.Play.from_row_tuple q = self._make_join_query(cursor, types.Play, sorter=sorter) cursor.execute(q) for row in cursor.fetchall(): results.append(init(self._db, row)) return results else: plays = OrderedDict() with Tx(self._db, factory=tuple_cursor) as cursor: init_play = types.Play.from_row_tuple q = self._make_join_query(cursor, types.Play, sorter=sorter) cursor.execute(q) for row in cursor.fetchall(): play = init_play(self._db, row) play._play_players = [] plays[make_pid(play)] = play # Run the above query *again* as a subquery. # This time, only fetch the primary key, and use that to # fetch all the `play_player` records in one swoop. aliases = {'play_player': 'pp'} ids = self._make_join_query(cursor, types.Play, only_prim=True, sorter=sorter) from_tables = types.PlayPlayer._sql_from(aliases=aliases) columns = types.PlayPlayer._sql_select_fields( fields=types.PlayPlayer.sql_fields(), aliases=aliases) q = ''' SELECT {columns} {from_tables} WHERE (pp.gsis_id, pp.drive_id, pp.play_id) IN ({ids}) '''.format(columns=', '.join(columns), from_tables=from_tables, ids=ids) init_pp = types.PlayPlayer.from_row_tuple cursor.execute(q) for row in cursor.fetchall(): pp = init_pp(self._db, row) plays[make_pid(pp)]._play_players.append(pp) return plays.values()
def player_search(db, full_name, team=None, position=None, limit=1, soundex=False): """ Given a database handle and a player's full name, this function searches the database for players with full names *similar* to the one given. Similarity is measured by the [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance), or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex). Results are returned as tuples. The first element is the is a `nfldb.Player` object and the second element is the Levenshtein (or Soundex) distance. When `limit` is `1` (the default), then the return value is a tuple. When `limit` is more than `1`, then the return value is a list of tuples. If no results are found, then `(None, None)` is returned when `limit == 1` or the empty list is returned when `limit > 1`. If `team` is not `None`, then only players **currently** on the team provided will be returned. Any players with an unknown team are therefore omitted. If `position` is not `None`, then only players **currently** at that position will be returned. Any players with an unknown position are therefore omitted. In order to use this function, the PostgreSQL `levenshtein` function must be available. If running this functions gives you an error about "No function matches the given name and argument types", then you can install the `levenshtein` function into your database by running the SQL query `CREATE EXTENSION fuzzystrmatch` as a superuser like `postgres`. For example: #!bash psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb Note that enabled the `fuzzystrmatch` extension also provides functions for comparing using Soundex. """ assert isinstance(limit, int) and limit >= 1 if soundex: # Careful, soundex distances are sorted in reverse of Levenshtein # distances. # Difference yields an integer in [0, 4]. # A 4 is an exact match. fuzzy = 'difference(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance DESC LIMIT {limit} ''' else: fuzzy = 'levenshtein(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance ASC LIMIT {limit} ''' qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team, )).decode('utf-8') if position is not None: qposition = cursor.mogrify('position = %s', (position, )).decode('utf-8') fuzzy_filled = cursor.mogrify(fuzzy, (full_name, )).decode('utf-8') columns = types.Player._sql_select_fields(types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format(columns=', '.join(columns), where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam, qposition), limit=limit) cursor.execute(q, (full_name, )) for row in cursor.fetchall(): r = (types.Player.from_row_dict(db, row), row['distance']) results.append(r) if limit == 1: if len(results) == 0: return (None, None) return results[0] return results
import nfldb from nfldb.db import Tx from nfldb.update import * thisDB = nfldb.connect() update_game_schedules(thisDB) update_games(thisDB, batch_size=16) with Tx(thisDB) as cursor: update_season_state(cursor) update_players(cursor, 30) queryObject = nfldb.Query(thisDB) queryObject.game(season_year=2012, season_type='Regular') for pp in queryObject.sort('rushing_yds').limit(5).as_aggregate(): print(pp.player, pp.rushing_yds) # simulatedGame = game_from_id(Tx(thisDB), "2009080950") with Tx(thisDB) as cursor: gamesInProgress = games_in_progress(cursor) simulatedGame = game_from_id(cursor, "2009080950") print(simulatedGame)