def as_aggregate(self): """ Executes the query and returns the results as aggregated `nfldb.PlayPlayer` objects. This method is meant to be a more restricted but much faster version of `nfldb.aggregate`. Namely, this method uses PostgreSQL to compute the aggregate statistics while `nfldb.aggregate` computes them in Python code. If any sorting criteria is specified, it is applied to the aggregate *player* values only. """ joins = '' results = [] with Tx(self._db) as cur: for ent in self._entities(): if ent is types.PlayPlayer: continue joins += types.PlayPlayer._sql_join_to_all(ent) sum_fields = types._player_categories.keys() \ + types.PlayPlayer._sql_tables['derived'] select_sum_fields = types.PlayPlayer._sql_select_fields( sum_fields, wrap=lambda f: 'SUM(%s)' % f) where = self._sql_where(cur) having = self._sql_where(cur, aggregate=True) q = ''' SELECT play_player.player_id, {sum_fields} FROM play_player {joins} WHERE {where} GROUP BY play_player.player_id HAVING {having} {order} '''.format( sum_fields=', '.join(select_sum_fields), joins=joins, where=sql.ands(where), having=sql.ands(having), order=self._sorter.sql(types.PlayPlayer, aliases={'play_player': ''}), ) cur.execute(q) for row in cur.fetchall(): stats = {} for f in sum_fields: v = row[f] if v != 0: stats[f] = v pp = types.PlayPlayer(self._db, None, None, None, row['player_id'], None, stats) results.append(pp) return results
def get_results(fuzzy, q, name_type, name): fuzzy = fuzzy % name_type similar = 'LOWER(%s) LIKE %%s' % name_type qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team, )) if position is not None: qposition = cursor.mogrify('position = %s', (position, )) fuzzy_filled = cursor.mogrify(fuzzy, (name, )) similar_filled = cursor.mogrify(similar, (name + '%', )) columns = types.Player._sql_select_fields( types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format(columns=', '.join(columns), where=sql.ands(similar_filled, fuzzy_filled + ' IS NOT NULL', 'team != \'UNK\'', qteam, qposition), limit=limit) cursor.execute(q) for row in cursor.fetchall(): results.append( (types.Player.from_row_dict(db, row), row['distance'])) return results
def get_results(fuzzy, q, name_type, name): fuzzy = fuzzy % name_type similar = "LOWER(%s) LIKE %%s" % name_type qteam, qposition = "", "" results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify("team = %s", (team,)) if position is not None: qposition = cursor.mogrify("position = %s", (position,)) fuzzy_filled = cursor.mogrify(fuzzy, (name,)) similar_filled = cursor.mogrify(similar, (name + "%",)) columns = types.Player._sql_select_fields(types.Player.sql_fields()) columns.append("%s AS distance" % fuzzy_filled) q = q.format( columns=", ".join(columns), where=sql.ands(similar_filled, fuzzy_filled + " IS NOT NULL", "team != 'UNK'", qteam, qposition), limit=limit, ) cursor.execute(q) for row in cursor.fetchall(): results.append((types.Player.from_row_dict(db, row), row["distance"])) return results
def _make_join_query(self, cursor, entity, only_prim=False, sorter=None, ent_fillers=None): if sorter is None: sorter = self._sorter(entity) entities = self._entities() entities.update(sorter.entities) for ent in ent_fillers or []: entities.add(ent) entities.discard(entity) # If we're joining the `player` table with any other table except # `play_player`, then we MUST add `play_player` as a joining table. # It is the only way to bridge players and games/drives/plays. # # TODO: This could probably be automatically deduced in general case, # but we only have one case so just check for it manually. if (entity is not types.PlayPlayer and types.Player in entities) \ or (entity is types.Player and len(entities) > 0): entities.add(types.PlayPlayer) if only_prim: columns = entity._sql_tables['primary'] fields = entity._sql_select_fields(fields=columns) else: fields = [] for ent in ent_fillers or []: fields += ent._sql_select_fields(fields=ent.sql_fields()) fields += entity._sql_select_fields(fields=entity.sql_fields()) args = { 'columns': ', '.join(fields), 'from': entity._sql_from(), 'joins': entity._sql_join_all(entities), 'where': sql.ands(self._sql_where(cursor)), 'groupby': '', 'sortby': sorter.sql(), } # We need a GROUP BY if we're joining with a table that has more # specific information. e.g., selecting from game with criteria # for plays. if any(entity._sql_relation_distance(to) > 0 for to in entities): fields = [] for table, _ in entity._sql_tables['tables']: fields += entity._sql_primary_key(table) args['groupby'] = 'GROUP BY ' + ', '.join(fields) q = """ SELECT {columns} {from} {joins} WHERE {where} {groupby} {sortby} """.format(**args) return q
def _make_join_query(self, cursor, entity, only_prim=False, sorter=None, ent_fillers=None): if sorter is None: sorter = self._sorter(entity) entities = self._entities() entities.update(sorter.entities) for ent in ent_fillers or []: entities.add(ent) entities.discard(entity) # If we're joining the `player` table with any other table except # `play_player`, then we MUST add `play_player` as a joining table. # It is the only way to bridge players and games/drives/plays. # # TODO: This could probably be automatically deduced in general case, # but we only have one case so just check for it manually. if (entity is not types.PlayPlayer and types.Player in entities) \ or (entity is types.Player and len(entities) > 0): entities.add(types.PlayPlayer) if only_prim: columns = entity._sql_tables['primary'] fields = entity._sql_select_fields(fields=columns) else: fields = [] for ent in ent_fillers or []: fields += ent._sql_select_fields(fields=ent.sql_fields()) fields += entity._sql_select_fields(fields=entity.sql_fields()) args = { 'columns': ', '.join(fields), 'from': entity._sql_from(), 'joins': entity._sql_join_all(entities), 'where': sql.ands(self._sql_where(cursor)), 'groupby': '', 'sortby': sorter.sql(), } # We need a GROUP BY if we're joining with a table that has more # specific information. e.g., selecting from game with criteria # for plays. if any(entity._sql_relation_distance(to) > 0 for to in entities if entity._sql_relation_distance(to) is not None): fields = [] for table, _ in entity._sql_tables['tables']: fields += entity._sql_primary_key(table) args['groupby'] = 'GROUP BY ' + ', '.join(fields) q = ''' SELECT {columns} {from} {joins} WHERE {where} {groupby} {sortby} '''.format(**args) return q
def as_aggregate(self): """ Executes the query and returns the results as aggregated `nfldb.PlayPlayer` objects. This method is meant to be a more restricted but much faster version of `nfldb.aggregate`. Namely, this method uses PostgreSQL to compute the aggregate statistics while `nfldb.aggregate` computes them in Python code. If any sorting criteria is specified, it is applied to the aggregate *player* values only. """ class AggPP (types.PlayPlayer): @classmethod def _sql_field(cls, name, aliases=None): if name in cls._derived_combined: fields = cls._derived_combined[name] fields = [cls._sql_field(f, aliases=aliases) for f in fields] return ' + '.join(fields) elif name == 'points': fields = ['(%s * %d)' % (cls._sql_field(f, aliases=aliases), pval) for f, pval in cls._point_values] return ' + '.join(fields) else: sql = super(AggPP, cls)._sql_field(name, aliases=aliases) return 'SUM(%s)' % sql joins = '' results = [] with Tx(self._db) as cur: for ent in self._entities(): if ent is types.PlayPlayer: continue joins += types.PlayPlayer._sql_join_to_all(ent) sum_fields = list(types._player_categories.keys()) \ + AggPP._sql_tables['derived'] select_sum_fields = AggPP._sql_select_fields(sum_fields) where = self._sql_where(cur) having = self._sql_where(cur, aggregate=True) q = ''' SELECT play_player.player_id AS play_player_player_id, {sum_fields} FROM play_player {joins} WHERE {where} GROUP BY play_player.player_id HAVING {having} {order} '''.format( sum_fields=', '.join(select_sum_fields), joins=joins, where=sql.ands(where), having=sql.ands(having), order=self._sorter(AggPP).sql(), ) init = AggPP.from_row_dict cur.execute(q) for row in cur.fetchall(): results.append(init(self._db, row)) return results
def player_search(db, full_name, team=None, position=None, limit=1, soundex=False): """ Given a database handle and a player's full name, this function searches the database for players with full names *similar* to the one given. Similarity is measured by the [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance), or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex). Results are returned as tuples. The first element is the is a `nfldb.Player` object and the second element is the Levenshtein (or Soundex) distance. When `limit` is `1` (the default), then the return value is a tuple. When `limit` is more than `1`, then the return value is a list of tuples. If no results are found, then `(None, None)` is returned when `limit == 1` or the empty list is returned when `limit > 1`. If `team` is not `None`, then only players **currently** on the team provided will be returned. Any players with an unknown team are therefore omitted. If `position` is not `None`, then only players **currently** at that position will be returned. Any players with an unknown position are therefore omitted. In order to use this function, the PostgreSQL `levenshtein` function must be available. If running this functions gives you an error about "No function matches the given name and argument types", then you can install the `levenshtein` function into your database by running the SQL query `CREATE EXTENSION fuzzystrmatch` as a superuser like `postgres`. For example: #!bash psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb Note that enabled the `fuzzystrmatch` extension also provides functions for comparing using Soundex. """ assert isinstance(limit, int) and limit >= 1 if soundex: # Careful, soundex distances are sorted in reverse of Levenshtein # distances. # Difference yields an integer in [0, 4]. # A 4 is an exact match. fuzzy = 'difference(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance DESC LIMIT {limit} ''' else: fuzzy = 'levenshtein(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance ASC LIMIT {limit} ''' qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team,)).decode('utf-8') if position is not None: qposition = cursor.mogrify('position = %s', (position,)).decode('utf-8') fuzzy_filled = cursor.mogrify(fuzzy, (full_name,)).decode('utf-8') columns = types.Player._sql_select_fields(types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format( columns=', '.join(columns), where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam, qposition), limit=limit) cursor.execute(q, (full_name,)) for row in cursor.fetchall(): r = (types.Player.from_row_dict(db, row), row['distance']) results.append(r) if limit == 1: if len(results) == 0: return (None, None) return results[0] return results
def as_aggregate(self): """ Executes the query and returns the results as aggregated `nfldb.PlayPlayer` objects. This method is meant to be a more restricted but much faster version of `nfldb.aggregate`. Namely, this method uses PostgreSQL to compute the aggregate statistics while `nfldb.aggregate` computes them in Python code. If any sorting criteria is specified, it is applied to the aggregate *player* values only. """ class AggPP(types.PlayPlayer): @classmethod def _sql_field(cls, name, aliases=None): if name in cls._derived_combined: fields = cls._derived_combined[name] fields = [ cls._sql_field(f, aliases=aliases) for f in fields ] return ' + '.join(fields) elif name == 'points': fields = [ '(%s * %d)' % (cls._sql_field(f, aliases=aliases), pval) for f, pval in cls._point_values ] return ' + '.join(fields) else: sql = super(AggPP, cls)._sql_field(name, aliases=aliases) return 'SUM(%s)' % sql joins = '' results = [] with Tx(self._db) as cur: for ent in self._entities(): if ent is types.PlayPlayer: continue joins += types.PlayPlayer._sql_join_to_all(ent) sum_fields = list(types._player_categories.keys()) \ + AggPP._sql_tables['derived'] select_sum_fields = AggPP._sql_select_fields(sum_fields) where = self._sql_where(cur) having = self._sql_where(cur, aggregate=True) q = ''' SELECT play_player.player_id AS play_player_player_id, {sum_fields} FROM play_player {joins} WHERE {where} GROUP BY play_player.player_id HAVING {having} {order} '''.format( sum_fields=', '.join(select_sum_fields), joins=joins, where=sql.ands(where), having=sql.ands(having), order=self._sorter(AggPP).sql(), ) init = AggPP.from_row_dict cur.execute(q) for row in cur.fetchall(): results.append(init(self._db, row)) return results
def player_search(db, full_name, team=None, position=None, limit=1, soundex=False): """ Given a database handle and a player's full name, this function searches the database for players with full names *similar* to the one given. Similarity is measured by the [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance), or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex). Results are returned as tuples. The first element is the is a `nfldb.Player` object and the second element is the Levenshtein (or Soundex) distance. When `limit` is `1` (the default), then the return value is a tuple. When `limit` is more than `1`, then the return value is a list of tuples. If no results are found, then `(None, None)` is returned when `limit == 1` or the empty list is returned when `limit > 1`. If `team` is not `None`, then only players **currently** on the team provided will be returned. Any players with an unknown team are therefore omitted. If `position` is not `None`, then only players **currently** at that position will be returned. Any players with an unknown position are therefore omitted. In order to use this function, the PostgreSQL `levenshtein` function must be available. If running this functions gives you an error about "No function matches the given name and argument types", then you can install the `levenshtein` function into your database by running the SQL query `CREATE EXTENSION fuzzystrmatch` as a superuser like `postgres`. For example: #!bash psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb Note that enabled the `fuzzystrmatch` extension also provides functions for comparing using Soundex. """ assert isinstance(limit, int) and limit >= 1 if soundex: # Careful, soundex distances are sorted in reverse of Levenshtein # distances. # Difference yields an integer in [0, 4]. # A 4 is an exact match. fuzzy = 'difference(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance DESC LIMIT {limit} ''' else: fuzzy = 'levenshtein(full_name, %s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance ASC LIMIT {limit} ''' qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team, )).decode('utf-8') if position is not None: qposition = cursor.mogrify('position = %s', (position, )).decode('utf-8') fuzzy_filled = cursor.mogrify(fuzzy, (full_name, )).decode('utf-8') columns = types.Player._sql_select_fields(types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format(columns=', '.join(columns), where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam, qposition), limit=limit) cursor.execute(q, (full_name, )) for row in cursor.fetchall(): r = (types.Player.from_row_dict(db, row), row['distance']) results.append(r) if limit == 1: if len(results) == 0: return (None, None) return results[0] return results