Example #1
0
    def get_results(fuzzy, q, name_type, name):
        fuzzy = fuzzy % name_type
        similar = 'LOWER(%s) LIKE %%s' % name_type
        qteam, qposition = '', ''
        results = []
        with Tx(db) as cursor:
            if team is not None:
                qteam = cursor.mogrify('team = %s', (team, ))
            if position is not None:
                qposition = cursor.mogrify('position = %s', (position, ))

            fuzzy_filled = cursor.mogrify(fuzzy, (name, ))
            similar_filled = cursor.mogrify(similar, (name + '%', ))
            columns = types.Player._sql_select_fields(
                types.Player.sql_fields())
            columns.append('%s AS distance' % fuzzy_filled)
            q = q.format(columns=', '.join(columns),
                         where=sql.ands(similar_filled,
                                        fuzzy_filled + ' IS NOT NULL',
                                        'team != \'UNK\'', qteam, qposition),
                         limit=limit)
            cursor.execute(q)

            for row in cursor.fetchall():
                results.append(
                    (types.Player.from_row_dict(db, row), row['distance']))
        return results
Example #2
0
    def as_drives(self):
        """
        Executes the query and returns the results as a list of
        `nfldb.Drive` objects.
        """
        self._assert_no_aggregate()

        results = []
        with Tx(self._db, factory=tuple_cursor) as cursor:
            q = self._make_join_query(cursor, types.Drive)
            cursor.execute(q)
            for row in cursor.fetchall():
                results.append(types.Drive.from_row_tuple(self._db, row))
        return results
Example #3
0
    def show_where(self, aggregate=False):
        """
        Returns an approximate WHERE clause corresponding to the
        criteria specified in `self`. Note that the WHERE clause given
        is never explicitly used for performance reasons, but one hopes
        that it describes the criteria in `self`.

        If `aggregate` is `True`, then aggregate criteria for the
        `play` and `play_player` tables is shown with aggregate
        functions applied.
        """
        with Tx(self._db) as cur:
            return self._sql_where(cur, aggregate=aggregate)
        return ''
Example #4
0
    def as_players(self):
        """
        Executes the query and returns the results as a list of
        `nfldb.Player` objects.
        """
        self._assert_no_aggregate()

        results = []
        with Tx(self._db) as cursor:
            q = self._make_join_query(cursor, types.Player)
            cursor.execute(q)

            for row in cursor.fetchall():
                results.append(types.Player.from_row_dict(self._db, row))
        return results
Example #5
0
def current(db):
    """
    Returns a triple of `nfldb.Enums.season_phase`, season year and week
    corresponding to values that `nfldb` thinks are current.

    Note that this only queries the database. Only the `nfldb-update`
    script fetches the current state from NFL.com.

    The values retrieved may be `None` if the season is over or if they
    haven't been updated yet by the `nfldb-update` script.
    """
    with Tx(db, factory=tuple_cursor) as cursor:
        cursor.execute('SELECT season_type, season_year, week FROM meta')
        return cursor.fetchone()
    return tuple([None] * 3)
Example #6
0
    def as_play_players(self):
        """
        Executes the query and returns the results as a list of
        `nlfdb.PlayPlayer` objects.

        This provides a way to access player statistics directly
        by bypassing play data. Usually the results of this method
        are passed to `nfldb.aggregate`. It is recommended to use
        `nfldb.Query.aggregate` and `nfldb.Query.as_aggregate` when
        possible, since it is significantly faster to sum statistics in
        the database as opposed to Python.
        """
        self._assert_no_aggregate()

        results = []
        with Tx(self._db, factory=tuple_cursor) as cursor:
            init = types.PlayPlayer.from_row_tuple
            q = self._make_join_query(cursor, types.PlayPlayer)
            cursor.execute(q)
            for row in cursor.fetchall():
                results.append(init(self._db, row))
        return results
Example #7
0
    def as_aggregate(self):
        """
        Executes the query and returns the results as aggregated
        `nfldb.PlayPlayer` objects. This method is meant to be a more
        restricted but much faster version of `nfldb.aggregate`.
        Namely, this method uses PostgreSQL to compute the aggregate
        statistics while `nfldb.aggregate` computes them in Python
        code.

        If any sorting criteria is specified, it is applied to the
        aggregate *player* values only.
        """
        class AggPP(types.PlayPlayer):
            @classmethod
            def _sql_field(cls, name, aliases=None):

                if name in cls._derived_combined:
                    fields = cls._derived_combined[name]
                    fields = [
                        cls._sql_field(f, aliases=aliases) for f in fields
                    ]
                    return ' + '.join(fields)
                elif name == 'points':
                    fields = [
                        '(%s * %d)' %
                        (cls._sql_field(f, aliases=aliases), pval)
                        for f, pval in cls._point_values
                    ]
                    return ' + '.join(fields)
                else:
                    sql = super(AggPP, cls)._sql_field(name, aliases=aliases)
                    return 'SUM(%s)' % sql

        joins = ''
        results = []
        with Tx(self._db) as cur:
            for ent in self._entities():
                if ent is types.PlayPlayer:
                    continue
                joins += types.PlayPlayer._sql_join_to_all(ent)

            sum_fields = list(types._player_categories.keys()) \
                + AggPP._sql_tables['derived']
            select_sum_fields = AggPP._sql_select_fields(sum_fields)
            where = self._sql_where(cur)
            having = self._sql_where(cur, aggregate=True)
            q = '''
                SELECT
                    play_player.player_id AS play_player_player_id, {sum_fields}
                FROM play_player
                {joins}
                WHERE {where}
                GROUP BY play_player.player_id
                HAVING {having}
                {order}
            '''.format(
                sum_fields=', '.join(select_sum_fields),
                joins=joins,
                where=sql.ands(where),
                having=sql.ands(having),
                order=self._sorter(AggPP).sql(),
            )

            init = AggPP.from_row_dict
            cur.execute(q)
            for row in cur.fetchall():
                results.append(init(self._db, row))
        return results
Example #8
0
    def as_plays(self, fill=True):
        """
        Executes the query and returns the results as a dictionary
        of `nlfdb.Play` objects that don't have the `play_player`
        attribute filled. The keys of the dictionary are play id
        tuples with the spec `(gsis_id, drive_id, play_id)`.

        The primary key membership SQL expression is also returned.
        """
        def make_pid(play):
            return (play.gsis_id, play.drive_id, play.play_id)

        self._assert_no_aggregate()

        # This is pretty terrifying.
        # Apparently PostgreSQL can change the order of rows returned
        # depending on the columns selected. So e.g., if you sort by `down`
        # and limit to 20 results, you might get a different 20 plays if
        # you change which columns you're selecting.
        # This is pertinent here because if we're filling plays with player
        # statistics, then we are assuming that this order never changes.
        # To make the ordering consistent, we add the play's primary key to
        # the existing sort criteria, which guarantees that the sort will
        # always be the same.
        # (We are careful not to override the user specified
        # `self._sort_exprs`.)
        #
        # That was a lie. We override the user settings if the user asks
        # to sort by `gsis_id`, `drive_id` or `play_id`.
        consistent = [(c, 'asc') for c in ['gsis_id', 'drive_id', 'play_id']]
        sorter = Sorter(types.Play, self._sort_exprs, self._limit)
        sorter.add_exprs(*consistent)

        if not fill:
            results = []
            with Tx(self._db, factory=tuple_cursor) as cursor:
                init = types.Play.from_row_tuple
                q = self._make_join_query(cursor, types.Play, sorter=sorter)
                cursor.execute(q)
                for row in cursor.fetchall():
                    results.append(init(self._db, row))
            return results
        else:
            plays = OrderedDict()
            with Tx(self._db, factory=tuple_cursor) as cursor:
                init_play = types.Play.from_row_tuple
                q = self._make_join_query(cursor, types.Play, sorter=sorter)
                cursor.execute(q)
                for row in cursor.fetchall():
                    play = init_play(self._db, row)
                    play._play_players = []
                    plays[make_pid(play)] = play

                # Run the above query *again* as a subquery.
                # This time, only fetch the primary key, and use that to
                # fetch all the `play_player` records in one swoop.
                aliases = {'play_player': 'pp'}
                ids = self._make_join_query(cursor,
                                            types.Play,
                                            only_prim=True,
                                            sorter=sorter)
                from_tables = types.PlayPlayer._sql_from(aliases=aliases)
                columns = types.PlayPlayer._sql_select_fields(
                    fields=types.PlayPlayer.sql_fields(), aliases=aliases)
                q = '''
                    SELECT {columns} {from_tables}
                    WHERE (pp.gsis_id, pp.drive_id, pp.play_id) IN ({ids})
                '''.format(columns=', '.join(columns),
                           from_tables=from_tables,
                           ids=ids)

                init_pp = types.PlayPlayer.from_row_tuple
                cursor.execute(q)
                for row in cursor.fetchall():
                    pp = init_pp(self._db, row)
                    plays[make_pid(pp)]._play_players.append(pp)
            return plays.values()
Example #9
0
def player_search(db,
                  full_name,
                  team=None,
                  position=None,
                  limit=1,
                  soundex=False):
    """
    Given a database handle and a player's full name, this function
    searches the database for players with full names *similar* to the
    one given. Similarity is measured by the
    [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance),
    or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex).

    Results are returned as tuples. The first element is the is a
    `nfldb.Player` object and the second element is the Levenshtein
    (or Soundex) distance. When `limit` is `1` (the default), then the
    return value is a tuple.  When `limit` is more than `1`, then the
    return value is a list of tuples.

    If no results are found, then `(None, None)` is returned when
    `limit == 1` or the empty list is returned when `limit > 1`.

    If `team` is not `None`, then only players **currently** on the
    team provided will be returned. Any players with an unknown team
    are therefore omitted.

    If `position` is not `None`, then only players **currently**
    at that position will be returned. Any players with an unknown
    position are therefore omitted.

    In order to use this function, the PostgreSQL `levenshtein`
    function must be available. If running this functions gives
    you an error about "No function matches the given name and
    argument types", then you can install the `levenshtein` function
    into your database by running the SQL query `CREATE EXTENSION
    fuzzystrmatch` as a superuser like `postgres`. For example:

        #!bash
        psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb

    Note that enabled the `fuzzystrmatch` extension also provides
    functions for comparing using Soundex.
    """
    assert isinstance(limit, int) and limit >= 1

    if soundex:
        # Careful, soundex distances are sorted in reverse of Levenshtein
        # distances.
        # Difference yields an integer in [0, 4].
        # A 4 is an exact match.
        fuzzy = 'difference(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance DESC LIMIT {limit}
        '''
    else:
        fuzzy = 'levenshtein(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance ASC LIMIT {limit}
        '''
    qteam, qposition = '', ''
    results = []
    with Tx(db) as cursor:
        if team is not None:
            qteam = cursor.mogrify('team = %s', (team, )).decode('utf-8')
        if position is not None:
            qposition = cursor.mogrify('position = %s',
                                       (position, )).decode('utf-8')

        fuzzy_filled = cursor.mogrify(fuzzy, (full_name, )).decode('utf-8')
        columns = types.Player._sql_select_fields(types.Player.sql_fields())
        columns.append('%s AS distance' % fuzzy_filled)
        q = q.format(columns=', '.join(columns),
                     where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam,
                                    qposition),
                     limit=limit)
        cursor.execute(q, (full_name, ))

        for row in cursor.fetchall():
            r = (types.Player.from_row_dict(db, row), row['distance'])
            results.append(r)
    if limit == 1:
        if len(results) == 0:
            return (None, None)
        return results[0]
    return results
Example #10
0
import nfldb
from nfldb.db import Tx
from nfldb.update import *

thisDB = nfldb.connect()
update_game_schedules(thisDB)
update_games(thisDB, batch_size=16)
with Tx(thisDB) as cursor:
    update_season_state(cursor)
    update_players(cursor, 30)
queryObject = nfldb.Query(thisDB)

queryObject.game(season_year=2012, season_type='Regular')
for pp in queryObject.sort('rushing_yds').limit(5).as_aggregate():
    print(pp.player, pp.rushing_yds)

# simulatedGame = game_from_id(Tx(thisDB), "2009080950")
with Tx(thisDB) as cursor:
    gamesInProgress = games_in_progress(cursor)
    simulatedGame = game_from_id(cursor, "2009080950")
    print(simulatedGame)