Exemple #1
0
    def as_aggregate(self):
        """
        Executes the query and returns the results as aggregated
        `nfldb.PlayPlayer` objects. This method is meant to be a more
        restricted but much faster version of `nfldb.aggregate`.
        Namely, this method uses PostgreSQL to compute the aggregate
        statistics while `nfldb.aggregate` computes them in Python
        code.

        If any sorting criteria is specified, it is applied to the
        aggregate *player* values only.
        """
        joins = ''
        results = []

        with Tx(self._db) as cur:
            for ent in self._entities():
                if ent is types.PlayPlayer:
                    continue
                joins += types.PlayPlayer._sql_join_to_all(ent)

            sum_fields = types._player_categories.keys() \
                + types.PlayPlayer._sql_tables['derived']
            select_sum_fields = types.PlayPlayer._sql_select_fields(
                sum_fields, wrap=lambda f: 'SUM(%s)' % f)
            where = self._sql_where(cur)
            having = self._sql_where(cur, aggregate=True)
            q = '''
                SELECT play_player.player_id, {sum_fields}
                FROM play_player
                {joins}
                WHERE {where}
                GROUP BY play_player.player_id
                HAVING {having}
                {order}
            '''.format(
                sum_fields=', '.join(select_sum_fields),
                joins=joins,
                where=sql.ands(where),
                having=sql.ands(having),
                order=self._sorter.sql(types.PlayPlayer,
                                       aliases={'play_player': ''}),
            )
            cur.execute(q)

            for row in cur.fetchall():
                stats = {}
                for f in sum_fields:
                    v = row[f]
                    if v != 0:
                        stats[f] = v
                pp = types.PlayPlayer(self._db, None, None, None,
                                      row['player_id'], None, stats)
                results.append(pp)
        return results
Exemple #2
0
    def get_results(fuzzy, q, name_type, name):
        fuzzy = fuzzy % name_type
        similar = 'LOWER(%s) LIKE %%s' % name_type
        qteam, qposition = '', ''
        results = []
        with Tx(db) as cursor:
            if team is not None:
                qteam = cursor.mogrify('team = %s', (team, ))
            if position is not None:
                qposition = cursor.mogrify('position = %s', (position, ))

            fuzzy_filled = cursor.mogrify(fuzzy, (name, ))
            similar_filled = cursor.mogrify(similar, (name + '%', ))
            columns = types.Player._sql_select_fields(
                types.Player.sql_fields())
            columns.append('%s AS distance' % fuzzy_filled)
            q = q.format(columns=', '.join(columns),
                         where=sql.ands(similar_filled,
                                        fuzzy_filled + ' IS NOT NULL',
                                        'team != \'UNK\'', qteam, qposition),
                         limit=limit)
            cursor.execute(q)

            for row in cursor.fetchall():
                results.append(
                    (types.Player.from_row_dict(db, row), row['distance']))
        return results
Exemple #3
0
    def get_results(fuzzy, q, name_type, name):
        fuzzy = fuzzy % name_type
        similar = "LOWER(%s) LIKE %%s" % name_type
        qteam, qposition = "", ""
        results = []
        with Tx(db) as cursor:
            if team is not None:
                qteam = cursor.mogrify("team = %s", (team,))
            if position is not None:
                qposition = cursor.mogrify("position = %s", (position,))

            fuzzy_filled = cursor.mogrify(fuzzy, (name,))
            similar_filled = cursor.mogrify(similar, (name + "%",))
            columns = types.Player._sql_select_fields(types.Player.sql_fields())
            columns.append("%s AS distance" % fuzzy_filled)
            q = q.format(
                columns=", ".join(columns),
                where=sql.ands(similar_filled, fuzzy_filled + " IS NOT NULL", "team != 'UNK'", qteam, qposition),
                limit=limit,
            )
            cursor.execute(q)

            for row in cursor.fetchall():
                results.append((types.Player.from_row_dict(db, row), row["distance"]))
        return results
Exemple #4
0
    def _make_join_query(self,
                         cursor,
                         entity,
                         only_prim=False,
                         sorter=None,
                         ent_fillers=None):
        if sorter is None:
            sorter = self._sorter(entity)

        entities = self._entities()
        entities.update(sorter.entities)
        for ent in ent_fillers or []:
            entities.add(ent)
        entities.discard(entity)

        # If we're joining the `player` table with any other table except
        # `play_player`, then we MUST add `play_player` as a joining table.
        # It is the only way to bridge players and games/drives/plays.
        #
        # TODO: This could probably be automatically deduced in general case,
        # but we only have one case so just check for it manually.
        if (entity is not types.PlayPlayer and types.Player in entities) \
                or (entity is types.Player and len(entities) > 0):
            entities.add(types.PlayPlayer)

        if only_prim:
            columns = entity._sql_tables['primary']
            fields = entity._sql_select_fields(fields=columns)
        else:
            fields = []
            for ent in ent_fillers or []:
                fields += ent._sql_select_fields(fields=ent.sql_fields())
            fields += entity._sql_select_fields(fields=entity.sql_fields())
        args = {
            'columns': ', '.join(fields),
            'from': entity._sql_from(),
            'joins': entity._sql_join_all(entities),
            'where': sql.ands(self._sql_where(cursor)),
            'groupby': '',
            'sortby': sorter.sql(),
        }

        # We need a GROUP BY if we're joining with a table that has more
        # specific information. e.g., selecting from game with criteria
        # for plays.
        if any(entity._sql_relation_distance(to) > 0 for to in entities):
            fields = []
            for table, _ in entity._sql_tables['tables']:
                fields += entity._sql_primary_key(table)
            args['groupby'] = 'GROUP BY ' + ', '.join(fields)

        q = """
        SELECT {columns} {from} {joins}
        WHERE {where}
        {groupby}
        {sortby}
        """.format(**args)
        return q
Exemple #5
0
    def _make_join_query(self, cursor, entity, only_prim=False, sorter=None,
                         ent_fillers=None):
        if sorter is None:
            sorter = self._sorter(entity)

        entities = self._entities()
        entities.update(sorter.entities)
        for ent in ent_fillers or []:
            entities.add(ent)
        entities.discard(entity)

        # If we're joining the `player` table with any other table except
        # `play_player`, then we MUST add `play_player` as a joining table.
        # It is the only way to bridge players and games/drives/plays.
        #
        # TODO: This could probably be automatically deduced in general case,
        # but we only have one case so just check for it manually.
        if (entity is not types.PlayPlayer and types.Player in entities) \
                or (entity is types.Player and len(entities) > 0):
            entities.add(types.PlayPlayer)

        if only_prim:
            columns = entity._sql_tables['primary']
            fields = entity._sql_select_fields(fields=columns)
        else:
            fields = []
            for ent in ent_fillers or []:
                fields += ent._sql_select_fields(fields=ent.sql_fields())
            fields += entity._sql_select_fields(fields=entity.sql_fields())
        args = {
            'columns': ', '.join(fields),
            'from': entity._sql_from(),
            'joins': entity._sql_join_all(entities),
            'where': sql.ands(self._sql_where(cursor)),
            'groupby': '',
            'sortby': sorter.sql(),
        }

        # We need a GROUP BY if we're joining with a table that has more
        # specific information. e.g., selecting from game with criteria
        # for plays.
        if any(entity._sql_relation_distance(to) > 0 for to in entities if entity._sql_relation_distance(to) is not None):
            fields = []
            for table, _ in entity._sql_tables['tables']:
                fields += entity._sql_primary_key(table)
            args['groupby'] = 'GROUP BY ' + ', '.join(fields)

        q = '''
            SELECT {columns} {from} {joins}
            WHERE {where}
            {groupby}
            {sortby}
        '''.format(**args)
        return q
Exemple #6
0
    def as_aggregate(self):
        """
        Executes the query and returns the results as aggregated
        `nfldb.PlayPlayer` objects. This method is meant to be a more
        restricted but much faster version of `nfldb.aggregate`.
        Namely, this method uses PostgreSQL to compute the aggregate
        statistics while `nfldb.aggregate` computes them in Python
        code.

        If any sorting criteria is specified, it is applied to the
        aggregate *player* values only.
        """
        class AggPP (types.PlayPlayer):
            @classmethod
            def _sql_field(cls, name, aliases=None):

                if name in cls._derived_combined:
                    fields = cls._derived_combined[name]
                    fields = [cls._sql_field(f, aliases=aliases) for f in fields]
                    return ' + '.join(fields)
                elif name == 'points':
                    fields = ['(%s * %d)' % (cls._sql_field(f, aliases=aliases), pval)
                              for f, pval in cls._point_values]
                    return ' + '.join(fields)
                else:
                    sql = super(AggPP, cls)._sql_field(name, aliases=aliases)
                    return 'SUM(%s)' % sql

        joins = ''
        results = []
        with Tx(self._db) as cur:
            for ent in self._entities():
                if ent is types.PlayPlayer:
                    continue
                joins += types.PlayPlayer._sql_join_to_all(ent)

            sum_fields = list(types._player_categories.keys()) \
                + AggPP._sql_tables['derived']
            select_sum_fields = AggPP._sql_select_fields(sum_fields)
            where = self._sql_where(cur)
            having = self._sql_where(cur, aggregate=True)
            q = '''
                SELECT
                    play_player.player_id AS play_player_player_id, {sum_fields}
                FROM play_player
                {joins}
                WHERE {where}
                GROUP BY play_player.player_id
                HAVING {having}
                {order}
            '''.format(
                sum_fields=', '.join(select_sum_fields),
                joins=joins,
                where=sql.ands(where),
                having=sql.ands(having),
                order=self._sorter(AggPP).sql(),
            )

            init = AggPP.from_row_dict
            cur.execute(q)
            for row in cur.fetchall():
                results.append(init(self._db, row))
        return results
Exemple #7
0
def player_search(db, full_name, team=None, position=None,
                  limit=1, soundex=False):
    """
    Given a database handle and a player's full name, this function
    searches the database for players with full names *similar* to the
    one given. Similarity is measured by the
    [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance),
    or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex).

    Results are returned as tuples. The first element is the is a
    `nfldb.Player` object and the second element is the Levenshtein
    (or Soundex) distance. When `limit` is `1` (the default), then the
    return value is a tuple.  When `limit` is more than `1`, then the
    return value is a list of tuples.

    If no results are found, then `(None, None)` is returned when
    `limit == 1` or the empty list is returned when `limit > 1`.

    If `team` is not `None`, then only players **currently** on the
    team provided will be returned. Any players with an unknown team
    are therefore omitted.

    If `position` is not `None`, then only players **currently**
    at that position will be returned. Any players with an unknown
    position are therefore omitted.

    In order to use this function, the PostgreSQL `levenshtein`
    function must be available. If running this functions gives
    you an error about "No function matches the given name and
    argument types", then you can install the `levenshtein` function
    into your database by running the SQL query `CREATE EXTENSION
    fuzzystrmatch` as a superuser like `postgres`. For example:

        #!bash
        psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb

    Note that enabled the `fuzzystrmatch` extension also provides
    functions for comparing using Soundex.
    """
    assert isinstance(limit, int) and limit >= 1

    if soundex:
        # Careful, soundex distances are sorted in reverse of Levenshtein
        # distances.
        # Difference yields an integer in [0, 4].
        # A 4 is an exact match.
        fuzzy = 'difference(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance DESC LIMIT {limit}
        '''
    else:
        fuzzy = 'levenshtein(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance ASC LIMIT {limit}
        '''
    qteam, qposition = '', ''
    results = []
    with Tx(db) as cursor:
        if team is not None:
            qteam = cursor.mogrify('team = %s', (team,)).decode('utf-8')
        if position is not None:
            qposition = cursor.mogrify('position = %s', (position,)).decode('utf-8')

        fuzzy_filled = cursor.mogrify(fuzzy, (full_name,)).decode('utf-8')
        columns = types.Player._sql_select_fields(types.Player.sql_fields())
        columns.append('%s AS distance' % fuzzy_filled)
        q = q.format(
            columns=', '.join(columns),
            where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam, qposition),
            limit=limit)
        cursor.execute(q, (full_name,))

        for row in cursor.fetchall():
            r = (types.Player.from_row_dict(db, row), row['distance'])
            results.append(r)
    if limit == 1:
        if len(results) == 0:
            return (None, None)
        return results[0]
    return results
Exemple #8
0
    def as_aggregate(self):
        """
        Executes the query and returns the results as aggregated
        `nfldb.PlayPlayer` objects. This method is meant to be a more
        restricted but much faster version of `nfldb.aggregate`.
        Namely, this method uses PostgreSQL to compute the aggregate
        statistics while `nfldb.aggregate` computes them in Python
        code.

        If any sorting criteria is specified, it is applied to the
        aggregate *player* values only.
        """
        class AggPP(types.PlayPlayer):
            @classmethod
            def _sql_field(cls, name, aliases=None):

                if name in cls._derived_combined:
                    fields = cls._derived_combined[name]
                    fields = [
                        cls._sql_field(f, aliases=aliases) for f in fields
                    ]
                    return ' + '.join(fields)
                elif name == 'points':
                    fields = [
                        '(%s * %d)' %
                        (cls._sql_field(f, aliases=aliases), pval)
                        for f, pval in cls._point_values
                    ]
                    return ' + '.join(fields)
                else:
                    sql = super(AggPP, cls)._sql_field(name, aliases=aliases)
                    return 'SUM(%s)' % sql

        joins = ''
        results = []
        with Tx(self._db) as cur:
            for ent in self._entities():
                if ent is types.PlayPlayer:
                    continue
                joins += types.PlayPlayer._sql_join_to_all(ent)

            sum_fields = list(types._player_categories.keys()) \
                + AggPP._sql_tables['derived']
            select_sum_fields = AggPP._sql_select_fields(sum_fields)
            where = self._sql_where(cur)
            having = self._sql_where(cur, aggregate=True)
            q = '''
                SELECT
                    play_player.player_id AS play_player_player_id, {sum_fields}
                FROM play_player
                {joins}
                WHERE {where}
                GROUP BY play_player.player_id
                HAVING {having}
                {order}
            '''.format(
                sum_fields=', '.join(select_sum_fields),
                joins=joins,
                where=sql.ands(where),
                having=sql.ands(having),
                order=self._sorter(AggPP).sql(),
            )

            init = AggPP.from_row_dict
            cur.execute(q)
            for row in cur.fetchall():
                results.append(init(self._db, row))
        return results
Exemple #9
0
def player_search(db,
                  full_name,
                  team=None,
                  position=None,
                  limit=1,
                  soundex=False):
    """
    Given a database handle and a player's full name, this function
    searches the database for players with full names *similar* to the
    one given. Similarity is measured by the
    [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance),
    or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex).

    Results are returned as tuples. The first element is the is a
    `nfldb.Player` object and the second element is the Levenshtein
    (or Soundex) distance. When `limit` is `1` (the default), then the
    return value is a tuple.  When `limit` is more than `1`, then the
    return value is a list of tuples.

    If no results are found, then `(None, None)` is returned when
    `limit == 1` or the empty list is returned when `limit > 1`.

    If `team` is not `None`, then only players **currently** on the
    team provided will be returned. Any players with an unknown team
    are therefore omitted.

    If `position` is not `None`, then only players **currently**
    at that position will be returned. Any players with an unknown
    position are therefore omitted.

    In order to use this function, the PostgreSQL `levenshtein`
    function must be available. If running this functions gives
    you an error about "No function matches the given name and
    argument types", then you can install the `levenshtein` function
    into your database by running the SQL query `CREATE EXTENSION
    fuzzystrmatch` as a superuser like `postgres`. For example:

        #!bash
        psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb

    Note that enabled the `fuzzystrmatch` extension also provides
    functions for comparing using Soundex.
    """
    assert isinstance(limit, int) and limit >= 1

    if soundex:
        # Careful, soundex distances are sorted in reverse of Levenshtein
        # distances.
        # Difference yields an integer in [0, 4].
        # A 4 is an exact match.
        fuzzy = 'difference(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance DESC LIMIT {limit}
        '''
    else:
        fuzzy = 'levenshtein(full_name, %s)'
        q = '''
            SELECT {columns}
            FROM player
            WHERE {where}
            ORDER BY distance ASC LIMIT {limit}
        '''
    qteam, qposition = '', ''
    results = []
    with Tx(db) as cursor:
        if team is not None:
            qteam = cursor.mogrify('team = %s', (team, )).decode('utf-8')
        if position is not None:
            qposition = cursor.mogrify('position = %s',
                                       (position, )).decode('utf-8')

        fuzzy_filled = cursor.mogrify(fuzzy, (full_name, )).decode('utf-8')
        columns = types.Player._sql_select_fields(types.Player.sql_fields())
        columns.append('%s AS distance' % fuzzy_filled)
        q = q.format(columns=', '.join(columns),
                     where=sql.ands(fuzzy_filled + ' IS NOT NULL', qteam,
                                    qposition),
                     limit=limit)
        cursor.execute(q, (full_name, ))

        for row in cursor.fetchall():
            r = (types.Player.from_row_dict(db, row), row['distance'])
            results.append(r)
    if limit == 1:
        if len(results) == 0:
            return (None, None)
        return results[0]
    return results