Example #1
0
def _drop_stat_indexes(c):
    from nfldb.types import _play_categories, _player_categories

    for cat in _player_categories.values():
        c.execute('DROP INDEX play_player_in_%s' % cat)
    for cat in _play_categories.values():
        c.execute('DROP INDEX play_in_%s' % cat)
Example #2
0
def _drop_stat_indexes(c):
    from nfldb.types import _play_categories, _player_categories

    for cat in _player_categories.values():
        c.execute('DROP INDEX play_player_in_%s' % cat)
    for cat in _play_categories.values():
        c.execute('DROP INDEX play_in_%s' % cat)
Example #3
0
def _create_stat_indexes(c):
    from nfldb.types import _play_categories, _player_categories

    for cat in _player_categories.values():
        c.execute("CREATE INDEX play_player_in_%s ON play_player (%s ASC)" % (cat, cat))
    for cat in _play_categories.values():
        c.execute("CREATE INDEX play_in_%s ON play (%s ASC)" % (cat, cat))
Example #4
0
def _create_stat_indexes(c):
    from nfldb.types import _play_categories, _player_categories

    for cat in _player_categories.values():
        c.execute('CREATE INDEX play_player_in_%s ON play_player (%s ASC)'
                  % (cat, cat))
    for cat in _play_categories.values():
        c.execute('CREATE INDEX play_in_%s ON play (%s ASC)' % (cat, cat))
Example #5
0
def _migrate_7(c):
    from nfldb.types import _player_categories

    print('''
MIGRATING DATABASE... PLEASE WAIT

THIS WILL ONLY HAPPEN ONCE.

This is currently adding a play aggregation table (a materialized view) derived
from the `play` and `play_player` tables. Depending on your machine, this
should take less than two minutes (this includes aggregating the data and
adding indexes).

This aggregation table will automatically update itself when data is added or
changed.
''', file=sys.stderr)

    c.execute('''
        CREATE TABLE agg_play (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id),
            FOREIGN KEY (gsis_id, drive_id, play_id)
                REFERENCES play (gsis_id, drive_id, play_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE
        )
    ''' % ', '.join(cat._sql_field for cat in _player_categories.values()))
    select = ['play.gsis_id', 'play.drive_id', 'play.play_id'] \
        + ['COALESCE(SUM(play_player.%s), 0)' % cat.category_id
           for cat in _player_categories.values()]
    c.execute('''
        INSERT INTO agg_play
        SELECT {select}
        FROM play
        LEFT JOIN play_player
        ON (play.gsis_id, play.drive_id, play.play_id)
           = (play_player.gsis_id, play_player.drive_id, play_player.play_id)
        GROUP BY play.gsis_id, play.drive_id, play.play_id
    '''.format(select=', '.join(select)))

    print('Aggregation complete. Adding indexes...', file=sys.stderr)
    c.execute('''
        CREATE INDEX agg_play_in_gsis_id
            ON agg_play (gsis_id ASC);
        CREATE INDEX agg_play_in_gsis_drive_id
            ON agg_play (gsis_id ASC, drive_id ASC);
    ''')
    for cat in _player_categories.values():
        c.execute('CREATE INDEX agg_play_in_%s ON agg_play (%s ASC)'
                  % (cat, cat))

    print('Indexing complete. Adding triggers...', file=sys.stderr)
    c.execute('''
        CREATE FUNCTION agg_play_insert() RETURNS trigger AS $$
            BEGIN
                INSERT INTO
                    agg_play (gsis_id, drive_id, play_id)
                    VALUES   (NEW.gsis_id, NEW.drive_id, NEW.play_id);
                RETURN NULL;
            END;
        $$ LANGUAGE 'plpgsql';
    ''')
    c.execute('''
        CREATE TRIGGER agg_play_sync_insert
        AFTER INSERT ON play
        FOR EACH ROW EXECUTE PROCEDURE agg_play_insert();
    ''')

    def make_sum(field):
        return 'COALESCE(SUM(play_player.{f}), 0) AS {f}'.format(f=field)
    select = [make_sum(f.category_id) for f in _player_categories.values()]
    set_columns = ['{f} = s.{f}'.format(f=f.category_id)
                   for f in _player_categories.values()]
    c.execute('''
        CREATE FUNCTION agg_play_update() RETURNS trigger AS $$
            BEGIN
                UPDATE agg_play SET {set_columns}
                FROM (
                    SELECT {select}
                    FROM play
                    LEFT JOIN play_player
                    ON (play.gsis_id, play.drive_id, play.play_id)
                       = (play_player.gsis_id, play_player.drive_id,
                          play_player.play_id)
                    WHERE (play.gsis_id, play.drive_id, play.play_id)
                          = (NEW.gsis_id, NEW.drive_id, NEW.play_id)
                ) s
                WHERE (agg_play.gsis_id, agg_play.drive_id, agg_play.play_id)
                      = (NEW.gsis_id, NEW.drive_id, NEW.play_id);
                RETURN NULL;
            END;
        $$ LANGUAGE 'plpgsql';
    '''.format(set_columns=', '.join(set_columns), select=', '.join(select)))
    c.execute('''
        CREATE TRIGGER agg_play_sync_update
        AFTER INSERT OR UPDATE ON play_player
        FOR EACH ROW EXECUTE PROCEDURE agg_play_update();
    ''')
Example #6
0
def _migrate_2(c):
    from nfldb.types import Enums, _play_categories, _player_categories

    # Create some types and common constraints.
    c.execute('''
        CREATE DOMAIN gameid AS character varying (10)
                          CHECK (char_length(VALUE) = 10)
    ''')
    c.execute('''
        CREATE DOMAIN usmallint AS smallint
                          CHECK (VALUE >= 0)
    ''')
    c.execute('''
        CREATE DOMAIN game_clock AS smallint
                          CHECK (VALUE >= 0 AND VALUE <= 900)
    ''')
    c.execute('''
        CREATE DOMAIN field_offset AS smallint
                          CHECK (VALUE >= -50 AND VALUE <= 50)
    ''')

    c.execute('''
        CREATE TYPE game_phase AS ENUM %s
    ''' % _mogrify(c, Enums.game_phase))
    c.execute('''
        CREATE TYPE season_phase AS ENUM %s
    ''' % _mogrify(c, Enums.season_phase))
    c.execute('''
        CREATE TYPE game_day AS ENUM %s
    ''' % _mogrify(c, Enums.game_day))
    c.execute('''
        CREATE TYPE player_pos AS ENUM %s
    ''' % _mogrify(c, Enums.player_pos))
    c.execute('''
        CREATE TYPE player_status AS ENUM %s
    ''' % _mogrify(c, Enums.player_status))
    c.execute('''
        CREATE TYPE game_time AS (
            phase game_phase,
            elapsed game_clock
        )
    ''')
    c.execute('''
        CREATE TYPE pos_period AS (
            elapsed usmallint
        )
    ''')
    c.execute('''
        CREATE TYPE field_pos AS (
            pos field_offset
        )
    ''')

    # Now that some types have been made, add current state to meta table.
    c.execute('''
        ALTER TABLE meta
            ADD season_type season_phase NULL,
            ADD season_year usmallint NULL
                    CHECK (season_year >= 1960 AND season_year <= 2100),
            ADD week usmallint NULL
                    CHECK (week >= 1 AND week <= 25)
    ''')

    # Create the team table and populate it.
    c.execute('''
        CREATE TABLE team (
            team_id character varying (3) NOT NULL,
            city character varying (50) NOT NULL,
            name character varying (50) NOT NULL,
            PRIMARY KEY (team_id)
        )
    ''')
    c.execute('''
        INSERT INTO team (team_id, city, name) VALUES %s
    ''' % (', '.join(_mogrify(c, team[0:3]) for team in nfldb.team.teams1)))

    c.execute('''
        CREATE TABLE player (
            player_id character varying (10) NOT NULL
                CHECK (char_length(player_id) = 10),
            gsis_name character varying (75) NULL,
            full_name character varying (100) NULL,
            first_name character varying (100) NULL,
            last_name character varying (100) NULL,
            team character varying (3) NOT NULL,
            position player_pos NOT NULL,
            profile_id integer NULL,
            profile_url character varying (255) NULL,
            uniform_number usmallint NULL,
            birthdate character varying (75) NULL,
            college character varying (255) NULL,
            height character varying (100) NULL,
            weight character varying (100) NULL,
            years_pro usmallint NULL,
            status player_status NOT NULL,
            PRIMARY KEY (player_id),
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')
    c.execute('''
        CREATE TABLE game (
            gsis_id gameid NOT NULL,
            gamekey character varying (5) NULL,
            start_time utctime NOT NULL,
            week usmallint NOT NULL
                CHECK (week >= 1 AND week <= 25),
            day_of_week game_day NOT NULL,
            season_year usmallint NOT NULL
                CHECK (season_year >= 1960 AND season_year <= 2100),
            season_type season_phase NOT NULL,
            finished boolean NOT NULL,
            home_team character varying (3) NOT NULL,
            home_score usmallint NOT NULL,
            home_score_q1 usmallint NULL,
            home_score_q2 usmallint NULL,
            home_score_q3 usmallint NULL,
            home_score_q4 usmallint NULL,
            home_score_q5 usmallint NULL,
            home_turnovers usmallint NOT NULL,
            away_team character varying (3) NOT NULL,
            away_score usmallint NOT NULL,
            away_score_q1 usmallint NULL,
            away_score_q2 usmallint NULL,
            away_score_q3 usmallint NULL,
            away_score_q4 usmallint NULL,
            away_score_q5 usmallint NULL,
            away_turnovers usmallint NOT NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            PRIMARY KEY (gsis_id),
            FOREIGN KEY (home_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE,
            FOREIGN KEY (away_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')
    c.execute('''
        CREATE TABLE drive (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            start_field field_pos NULL,
            start_time game_time NOT NULL,
            end_field field_pos NULL,
            end_time game_time NOT NULL,
            pos_team character varying (3) NOT NULL,
            pos_time pos_period NULL,
            first_downs usmallint NOT NULL,
            result text NULL,
            penalty_yards smallint NOT NULL,
            yards_gained smallint NOT NULL,
            play_count usmallint NOT NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            PRIMARY KEY (gsis_id, drive_id),
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (pos_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    # I've taken the approach of using a sparse table to represent
    # sparse play statistic data. See issue #2:
    # https://github.com/BurntSushi/nfldb/issues/2
    c.execute('''
        CREATE TABLE play (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            time game_time NOT NULL,
            pos_team character varying (3) NOT NULL,
            yardline field_pos NULL,
            down smallint NULL
                CHECK (down >= 1 AND down <= 4),
            yards_to_go smallint NULL
                CHECK (yards_to_go >= 0 AND yards_to_go <= 100),
            description text NULL,
            note text NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id),
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (pos_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''' % ', '.join([cat._sql_field for cat in _play_categories.values()]))

    c.execute('''
        CREATE TABLE play_player (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            player_id character varying (10) NOT NULL,
            team character varying (3) NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id, player_id),
            FOREIGN KEY (gsis_id, drive_id, play_id)
                REFERENCES play (gsis_id, drive_id, play_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (player_id)
                REFERENCES player (player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''' % ', '.join(cat._sql_field for cat in _player_categories.values()))
Example #7
0
def _migrate_2(c):
    from nfldb.types import Enums, _play_categories, _player_categories

    # Create some types and common constraints.
    c.execute('''
        CREATE DOMAIN gameid AS character varying (10)
                          CHECK (char_length(VALUE) = 10)
    ''')
    c.execute('''
        CREATE DOMAIN usmallint AS smallint
                          CHECK (VALUE >= 0)
    ''')
    c.execute('''
        CREATE DOMAIN game_clock AS smallint
                          CHECK (VALUE >= 0 AND VALUE <= 900)
    ''')
    c.execute('''
        CREATE DOMAIN field_offset AS smallint
                          CHECK (VALUE >= -50 AND VALUE <= 50)
    ''')

    c.execute('''
        CREATE TYPE game_phase AS ENUM %s
    ''' % _mogrify(c, Enums.game_phase))
    c.execute('''
        CREATE TYPE season_phase AS ENUM %s
    ''' % _mogrify(c, Enums.season_phase))
    c.execute('''
        CREATE TYPE game_day AS ENUM %s
    ''' % _mogrify(c, Enums.game_day))
    c.execute('''
        CREATE TYPE player_pos AS ENUM %s
    ''' % _mogrify(c, Enums.player_pos))
    c.execute('''
        CREATE TYPE player_status AS ENUM %s
    ''' % _mogrify(c, Enums.player_status))
    c.execute('''
        CREATE TYPE game_time AS (
            phase game_phase,
            elapsed game_clock
        )
    ''')
    c.execute('''
        CREATE TYPE pos_period AS (
            elapsed usmallint
        )
    ''')
    c.execute('''
        CREATE TYPE field_pos AS (
            pos field_offset
        )
    ''')

    # Now that some types have been made, add current state to meta table.
    c.execute('''
        ALTER TABLE meta
            ADD season_type season_phase NULL,
            ADD season_year usmallint NULL
                    CHECK (season_year >= 1960 AND season_year <= 2100),
            ADD week usmallint NULL
                    CHECK (week >= 1 AND week <= 25)
    ''')

    # Create the team table and populate it.
    c.execute('''
        CREATE TABLE team (
            team_id character varying (3) NOT NULL,
            city character varying (50) NOT NULL,
            name character varying (50) NOT NULL,
            PRIMARY KEY (team_id)
        )
    ''')
    c.execute('''
        INSERT INTO team (team_id, city, name) VALUES %s
    ''' % (', '.join(_mogrify(c, team[0:3]) for team in nfldb.team.teams)))

    c.execute('''
        CREATE TABLE player (
            player_id character varying (10) NOT NULL
                CHECK (char_length(player_id) = 10),
            gsis_name character varying (75) NULL,
            full_name character varying (100) NULL,
            first_name character varying (100) NULL,
            last_name character varying (100) NULL,
            team character varying (3) NOT NULL,
            position player_pos NOT NULL,
            profile_id integer NULL,
            profile_url character varying (255) NULL,
            uniform_number usmallint NULL,
            birthdate character varying (75) NULL,
            college character varying (255) NULL,
            height character varying (100) NULL,
            weight character varying (100) NULL,
            years_pro usmallint NULL,
            status player_status NOT NULL,
            PRIMARY KEY (player_id),
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')
    c.execute('''
        CREATE TABLE game (
            gsis_id gameid NOT NULL,
            gamekey character varying (5) NULL,
            start_time utctime NOT NULL,
            week usmallint NOT NULL
                CHECK (week >= 1 AND week <= 25),
            day_of_week game_day NOT NULL,
            season_year usmallint NOT NULL
                CHECK (season_year >= 1960 AND season_year <= 2100),
            season_type season_phase NOT NULL,
            finished boolean NOT NULL,
            home_team character varying (3) NOT NULL,
            home_score usmallint NOT NULL,
            home_score_q1 usmallint NULL,
            home_score_q2 usmallint NULL,
            home_score_q3 usmallint NULL,
            home_score_q4 usmallint NULL,
            home_score_q5 usmallint NULL,
            home_turnovers usmallint NOT NULL,
            away_team character varying (3) NOT NULL,
            away_score usmallint NOT NULL,
            away_score_q1 usmallint NULL,
            away_score_q2 usmallint NULL,
            away_score_q3 usmallint NULL,
            away_score_q4 usmallint NULL,
            away_score_q5 usmallint NULL,
            away_turnovers usmallint NOT NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            PRIMARY KEY (gsis_id),
            FOREIGN KEY (home_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE,
            FOREIGN KEY (away_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')
    c.execute('''
        CREATE TABLE drive (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            start_field field_pos NULL,
            start_time game_time NOT NULL,
            end_field field_pos NULL,
            end_time game_time NOT NULL,
            pos_team character varying (3) NOT NULL,
            pos_time pos_period NULL,
            first_downs usmallint NOT NULL,
            result text NULL,
            penalty_yards smallint NOT NULL,
            yards_gained smallint NOT NULL,
            play_count usmallint NOT NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            PRIMARY KEY (gsis_id, drive_id),
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (pos_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    # I've taken the approach of using a sparse table to represent
    # sparse play statistic data. See issue #2:
    # https://github.com/BurntSushi/nfldb/issues/2
    c.execute('''
        CREATE TABLE play (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            time game_time NOT NULL,
            pos_team character varying (3) NOT NULL,
            yardline field_pos NULL,
            down smallint NULL
                CHECK (down >= 1 AND down <= 4),
            yards_to_go smallint NULL
                CHECK (yards_to_go >= 0 AND yards_to_go <= 100),
            description text NULL,
            note text NULL,
            time_inserted utctime NOT NULL,
            time_updated utctime NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id),
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (pos_team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''' % ', '.join([cat._sql_field for cat in _play_categories.values()]))

    c.execute('''
        CREATE TABLE play_player (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            player_id character varying (10) NOT NULL,
            team character varying (3) NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id, player_id),
            FOREIGN KEY (gsis_id, drive_id, play_id)
                REFERENCES play (gsis_id, drive_id, play_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE,
            FOREIGN KEY (player_id)
                REFERENCES player (player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''' % ', '.join(cat._sql_field for cat in _player_categories.values()))
Example #8
0
def _migrate_nfldbproj_1(c):
    print('Adding nfldb-projections tables to the database...', file=sys.stderr)

    _create_enum(c, ProjEnums.fantasy_position)
    _create_enum(c, ProjEnums.proj_scope)

    c.execute('''
        CREATE DOMAIN uinteger AS integer
            CHECK (VALUE >= 0)
    ''')

    c.execute('''
        CREATE TABLE nfldbproj_meta (
            nfldbproj_version smallint
        )
    ''')
    c.execute('''
        INSERT INTO nfldbproj_meta (nfldbproj_version) VALUES (0)
    ''')

    c.execute('''
        CREATE TABLE projection_source (
            source_name character varying (100) NOT NULL,
            source_url character varying (255) NULL,
            source_notes text NULL,
            PRIMARY KEY (source_name)
        )
    ''')

    c.execute('''
        CREATE TABLE fp_system (
            fpsys_name character varying (100) NOT NULL,
            fpsys_url character varying (255) NULL,
            PRIMARY KEY (fpsys_name)
        )
    ''')
    # Handle stat projections by allowing them to reference a fantasy-point system "None".
    c.execute('''
        INSERT INTO fp_system (fpsys_name) VALUES ('None')
    ''')

    c.execute('''
        CREATE TABLE fantasy_player (
            fantasy_player_id character varying (10) NOT NULL,
            player_id character varying (10) NULL,
            dst_team character varying (3) NULL,
            PRIMARY KEY (fantasy_player_id),
            FOREIGN KEY (player_id)
                REFERENCES player (player_id)
                ON DELETE CASCADE
                ON UPDATE CASCADE,
            FOREIGN KEY (dst_team)
                REFERENCES team (team_id)
                ON DELETE CASCADE
                ON UPDATE CASCADE,
            CHECK (
                (player_id IS NULL AND dst_team IS NOT NULL AND fantasy_player_id = dst_team) OR
                (player_id IS NOT NULL AND dst_team IS NULL AND fantasy_player_id = player_id)
            )
        )
    ''')
    c.execute('''
        CREATE FUNCTION add_fantasy_player() RETURNS trigger AS $add_fantasy_player$
            BEGIN
                IF TG_TABLE_NAME = 'player' THEN
                    INSERT INTO fantasy_player (fantasy_player_id, player_id)
                        VALUES (NEW.player_id, NEW.player_id);
                    RETURN NEW;
                ELSIF TG_TABLE_NAME = 'team' THEN
                    INSERT INTO fantasy_player (fantasy_player_id, dst_team)
                        VALUES (NEW.team_id, NEW.team_id);
                    INSERT INTO name_disambiguation (name_as_scraped, fantasy_player_id)
                        VALUES (NEW.team_id, NEW.team_id);
                    RETURN NEW;
                END IF;
            END;
        $add_fantasy_player$ LANGUAGE plpgsql
    ''')
    c.execute('''
        CREATE TRIGGER fantasy_player_mirror_player
            AFTER INSERT ON player
            FOR EACH ROW
            EXECUTE PROCEDURE add_fantasy_player()
    ''')
    c.execute('''
        CREATE TRIGGER fantasy_player_mirror_team
            AFTER INSERT ON team
            FOR EACH ROW
            EXECUTE PROCEDURE add_fantasy_player()
    ''')
    c.execute('''
        INSERT INTO fantasy_player (fantasy_player_id, player_id)
          SELECT player_id, player_id FROM player
    ''')
    c.execute('''
        INSERT INTO fantasy_player (fantasy_player_id, dst_team)
          SELECT team_id, team_id FROM team
    ''')


    c.execute('''
        CREATE TABLE dfs_site (
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            dfs_name character varying (100) NOT NULL,
            dfs_url character varying (255) NOT NULL,
            PRIMARY KEY (fpsys_name, dfs_name),
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE RESTRICT
        )
    ''')

    c.execute('''
        CREATE TABLE dfs_salary (
            fpsys_name character varying (100) NOT NULL,
            dfs_name character varying (100) NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            season_year usmallint NOT NULL,
            season_type season_phase NOT NULL,
            week usmallint NOT NULL,
            salary uinteger NOT NULL,
            PRIMARY KEY (fpsys_name, dfs_name, fantasy_player_id, season_year, season_type, week),
            FOREIGN KEY (fpsys_name, dfs_name)
                REFERENCES dfs_site (fpsys_name, dfs_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT
        )
    ''')

    c.execute('''
        CREATE TABLE projection_set (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL,
            set_id SERIAL NOT NULL,
            projection_scope proj_scope NOT NULL,
            season_year usmallint NOT NULL,
            season_type season_phase NOT NULL,
            week usmallint NULL,
            date_accessed utctime NOT NULL DEFAULT (now() AT TIME ZONE 'utc'),
            known_incomplete bool NOT NULL DEFAULT FALSE,
            PRIMARY KEY (source_name, fpsys_name, set_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE
        )
    ''')

    c.execute('''
        CREATE INDEX projection_set_in_year_phase_week ON projection_set
            (season_year DESC, season_type DESC, week DESC)
    ''')

    c.execute('''
        CREATE TABLE stat_projection (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name = 'None'),
            set_id SERIAL NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            gsis_id gameid NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            {},
            PRIMARY KEY (source_name, fpsys_name, set_id, fantasy_player_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (source_name, fpsys_name, set_id)
                REFERENCES projection_set (source_name, fpsys_name, set_id)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    '''.format(
        ', '.join(_category_sql_field(cat) for cat in _player_categories.values())
    ))

    c.execute('''
        CREATE TABLE fp_projection (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            set_id usmallint NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            gsis_id gameid NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            projected_fp real NOT NULL,
            fp_variance real NULL CHECK (fp_variance >= 0),
            PRIMARY KEY (source_name, fpsys_name, set_id, fantasy_player_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (source_name, fpsys_name, set_id)
                REFERENCES projection_set (source_name, fpsys_name, set_id)
                ON DELETE CASCADE,
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    c.execute('''
        CREATE TABLE fp_score (
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            gsis_id gameid NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            actual_fp real NOT NULL,
            PRIMARY KEY (fpsys_name, gsis_id, fantasy_player_id),
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    c.execute('''
        CREATE TABLE name_disambiguation (
            name_as_scraped character varying (100) NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            PRIMARY KEY (name_as_scraped),
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE CASCADE
        )
    ''')

    # Name disambiguations for all team names.
    for team_names in teams:
        if team_names[0] == 'UNK':
            continue

        for team_name in team_names:
            if team_name == 'New York':  # 'New York' remains ambiguous.
                continue

            c.execute('''
                INSERT INTO name_disambiguation (name_as_scraped, fantasy_player_id)
                  VALUES (%s, %s)
            ''', (team_name, team_names[0]))
Example #9
0
def _migrate_7(c):
    from nfldb.types import _player_categories

    print(
        """
MIGRATING DATABASE... PLEASE WAIT

THIS WILL ONLY HAPPEN ONCE.

This is currently adding a play aggregation table (a materialized view) derived
from the `play` and `play_player` tables. Depending on your machine, this
should take less than two minutes (this includes aggregating the data and
adding indexes).

This aggregation table will automatically update itself when data is added or
changed.
""",
        file=sys.stderr,
    )

    c.execute(
        """
        CREATE TABLE agg_play (
            gsis_id gameid NOT NULL,
            drive_id usmallint NOT NULL,
            play_id usmallint NOT NULL,
            %s,
            PRIMARY KEY (gsis_id, drive_id, play_id),
            FOREIGN KEY (gsis_id, drive_id, play_id)
                REFERENCES play (gsis_id, drive_id, play_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id, drive_id)
                REFERENCES drive (gsis_id, drive_id)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE CASCADE
        )
    """
        % ", ".join(cat._sql_field for cat in _player_categories.values())
    )
    select = ["play.gsis_id", "play.drive_id", "play.play_id"] + [
        "COALESCE(SUM(play_player.%s), 0)" % cat.category_id for cat in _player_categories.values()
    ]
    c.execute(
        """
        INSERT INTO agg_play
        SELECT {select}
        FROM play
        LEFT JOIN play_player
        ON (play.gsis_id, play.drive_id, play.play_id)
           = (play_player.gsis_id, play_player.drive_id, play_player.play_id)
        GROUP BY play.gsis_id, play.drive_id, play.play_id
    """.format(
            select=", ".join(select)
        )
    )

    print("Aggregation complete. Adding indexes...", file=sys.stderr)
    c.execute(
        """
        CREATE INDEX agg_play_in_gsis_id
            ON agg_play (gsis_id ASC);
        CREATE INDEX agg_play_in_gsis_drive_id
            ON agg_play (gsis_id ASC, drive_id ASC);
    """
    )
    for cat in _player_categories.values():
        c.execute("CREATE INDEX agg_play_in_%s ON agg_play (%s ASC)" % (cat, cat))

    print("Indexing complete. Adding triggers...", file=sys.stderr)
    c.execute(
        """
        CREATE FUNCTION agg_play_insert() RETURNS trigger AS $$
            BEGIN
                INSERT INTO
                    agg_play (gsis_id, drive_id, play_id)
                    VALUES   (NEW.gsis_id, NEW.drive_id, NEW.play_id);
                RETURN NULL;
            END;
        $$ LANGUAGE 'plpgsql';
    """
    )
    c.execute(
        """
        CREATE TRIGGER agg_play_sync_insert
        AFTER INSERT ON play
        FOR EACH ROW EXECUTE PROCEDURE agg_play_insert();
    """
    )

    def make_sum(field):
        return "COALESCE(SUM(play_player.{f}), 0) AS {f}".format(f=field)

    select = [make_sum(f.category_id) for f in _player_categories.values()]
    set_columns = ["{f} = s.{f}".format(f=f.category_id) for f in _player_categories.values()]
    c.execute(
        """
        CREATE FUNCTION agg_play_update() RETURNS trigger AS $$
            BEGIN
                UPDATE agg_play SET {set_columns}
                FROM (
                    SELECT {select}
                    FROM play
                    LEFT JOIN play_player
                    ON (play.gsis_id, play.drive_id, play.play_id)
                       = (play_player.gsis_id, play_player.drive_id,
                          play_player.play_id)
                    WHERE (play.gsis_id, play.drive_id, play.play_id)
                          = (NEW.gsis_id, NEW.drive_id, NEW.play_id)
                ) s
                WHERE (agg_play.gsis_id, agg_play.drive_id, agg_play.play_id)
                      = (NEW.gsis_id, NEW.drive_id, NEW.play_id);
                RETURN NULL;
            END;
        $$ LANGUAGE 'plpgsql';
    """.format(
            set_columns=", ".join(set_columns), select=", ".join(select)
        )
    )
    c.execute(
        """
        CREATE TRIGGER agg_play_sync_update
        AFTER INSERT OR UPDATE ON play_player
        FOR EACH ROW EXECUTE PROCEDURE agg_play_update();
    """
    )
Example #10
0
def _migrate_nfldbproj_1(c):
    print('Adding nfldb-projections tables to the database...',
          file=sys.stderr)

    _create_enum(c, ProjEnums.fantasy_position)
    _create_enum(c, ProjEnums.proj_scope)

    c.execute('''
        CREATE DOMAIN uinteger AS integer
            CHECK (VALUE >= 0)
    ''')

    c.execute('''
        CREATE TABLE nfldbproj_meta (
            nfldbproj_version smallint
        )
    ''')
    c.execute('''
        INSERT INTO nfldbproj_meta (nfldbproj_version) VALUES (0)
    ''')

    c.execute('''
        CREATE TABLE projection_source (
            source_name character varying (100) NOT NULL,
            source_url character varying (255) NULL,
            source_notes text NULL,
            PRIMARY KEY (source_name)
        )
    ''')

    c.execute('''
        CREATE TABLE fp_system (
            fpsys_name character varying (100) NOT NULL,
            fpsys_url character varying (255) NULL,
            PRIMARY KEY (fpsys_name)
        )
    ''')
    # Handle stat projections by allowing them to reference a fantasy-point system "None".
    c.execute('''
        INSERT INTO fp_system (fpsys_name) VALUES ('None')
    ''')

    c.execute('''
        CREATE TABLE fantasy_player (
            fantasy_player_id character varying (10) NOT NULL,
            player_id character varying (10) NULL,
            dst_team character varying (3) NULL,
            PRIMARY KEY (fantasy_player_id),
            FOREIGN KEY (player_id)
                REFERENCES player (player_id)
                ON DELETE CASCADE
                ON UPDATE CASCADE,
            FOREIGN KEY (dst_team)
                REFERENCES team (team_id)
                ON DELETE CASCADE
                ON UPDATE CASCADE,
            CHECK (
                (player_id IS NULL AND dst_team IS NOT NULL AND fantasy_player_id = dst_team) OR
                (player_id IS NOT NULL AND dst_team IS NULL AND fantasy_player_id = player_id)
            )
        )
    ''')
    c.execute('''
        CREATE FUNCTION add_fantasy_player() RETURNS trigger AS $add_fantasy_player$
            BEGIN
                IF TG_TABLE_NAME = 'player' THEN
                    INSERT INTO fantasy_player (fantasy_player_id, player_id)
                        VALUES (NEW.player_id, NEW.player_id);
                    RETURN NEW;
                ELSIF TG_TABLE_NAME = 'team' THEN
                    INSERT INTO fantasy_player (fantasy_player_id, dst_team)
                        VALUES (NEW.team_id, NEW.team_id);
                    INSERT INTO name_disambiguation (name_as_scraped, fantasy_player_id)
                        VALUES (NEW.team_id, NEW.team_id);
                    RETURN NEW;
                END IF;
            END;
        $add_fantasy_player$ LANGUAGE plpgsql
    ''')
    c.execute('''
        CREATE TRIGGER fantasy_player_mirror_player
            AFTER INSERT ON player
            FOR EACH ROW
            EXECUTE PROCEDURE add_fantasy_player()
    ''')
    c.execute('''
        CREATE TRIGGER fantasy_player_mirror_team
            AFTER INSERT ON team
            FOR EACH ROW
            EXECUTE PROCEDURE add_fantasy_player()
    ''')
    c.execute('''
        INSERT INTO fantasy_player (fantasy_player_id, player_id)
          SELECT player_id, player_id FROM player
    ''')
    c.execute('''
        INSERT INTO fantasy_player (fantasy_player_id, dst_team)
          SELECT team_id, team_id FROM team
    ''')

    c.execute('''
        CREATE TABLE dfs_site (
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            dfs_name character varying (100) NOT NULL,
            dfs_url character varying (255) NOT NULL,
            PRIMARY KEY (fpsys_name, dfs_name),
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE RESTRICT
        )
    ''')

    c.execute('''
        CREATE TABLE dfs_salary (
            fpsys_name character varying (100) NOT NULL,
            dfs_name character varying (100) NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            season_year usmallint NOT NULL,
            season_type season_phase NOT NULL,
            week usmallint NOT NULL,
            salary uinteger NOT NULL,
            PRIMARY KEY (fpsys_name, dfs_name, fantasy_player_id, season_year, season_type, week),
            FOREIGN KEY (fpsys_name, dfs_name)
                REFERENCES dfs_site (fpsys_name, dfs_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT
        )
    ''')

    c.execute('''
        CREATE TABLE projection_set (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL,
            set_id SERIAL NOT NULL,
            projection_scope proj_scope NOT NULL,
            season_year usmallint NOT NULL,
            season_type season_phase NOT NULL,
            week usmallint NULL,
            date_accessed utctime NOT NULL DEFAULT (now() AT TIME ZONE 'utc'),
            known_incomplete bool NOT NULL DEFAULT FALSE,
            PRIMARY KEY (source_name, fpsys_name, set_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE
        )
    ''')

    c.execute('''
        CREATE INDEX projection_set_in_year_phase_week ON projection_set
            (season_year DESC, season_type DESC, week DESC)
    ''')

    c.execute('''
        CREATE TABLE stat_projection (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name = 'None'),
            set_id SERIAL NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            gsis_id gameid NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            {},
            PRIMARY KEY (source_name, fpsys_name, set_id, fantasy_player_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (source_name, fpsys_name, set_id)
                REFERENCES projection_set (source_name, fpsys_name, set_id)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    '''.format(', '.join(
        _category_sql_field(cat) for cat in _player_categories.values())))

    c.execute('''
        CREATE TABLE fp_projection (
            source_name character varying (100) NOT NULL,
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            set_id usmallint NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            gsis_id gameid NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            projected_fp real NOT NULL,
            fp_variance real NULL CHECK (fp_variance >= 0),
            PRIMARY KEY (source_name, fpsys_name, set_id, fantasy_player_id),
            FOREIGN KEY (source_name)
                REFERENCES projection_source (source_name)
                ON DELETE CASCADE,
            FOREIGN KEY (source_name, fpsys_name, set_id)
                REFERENCES projection_set (source_name, fpsys_name, set_id)
                ON DELETE CASCADE,
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    c.execute('''
        CREATE TABLE fp_score (
            fpsys_name character varying (100) NOT NULL CHECK (fpsys_name != 'None'),
            gsis_id gameid NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            team character varying (3) NOT NULL,
            fantasy_pos fantasy_position NOT NULL,
            actual_fp real NOT NULL,
            PRIMARY KEY (fpsys_name, gsis_id, fantasy_player_id),
            FOREIGN KEY (fpsys_name)
                REFERENCES fp_system (fpsys_name)
                ON DELETE CASCADE,
            FOREIGN KEY (gsis_id)
                REFERENCES game (gsis_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE RESTRICT,
            FOREIGN KEY (team)
                REFERENCES team (team_id)
                ON DELETE RESTRICT
                ON UPDATE CASCADE
        )
    ''')

    c.execute('''
        CREATE TABLE name_disambiguation (
            name_as_scraped character varying (100) NOT NULL,
            fantasy_player_id character varying (10) NOT NULL,
            PRIMARY KEY (name_as_scraped),
            FOREIGN KEY (fantasy_player_id)
                REFERENCES fantasy_player (fantasy_player_id)
                ON DELETE CASCADE
        )
    ''')

    # Name disambiguations for all team names.
    for team_names in teams:
        if team_names[0] == 'UNK':
            continue

        for team_name in team_names:
            if team_name == 'New York':  # 'New York' remains ambiguous.
                continue

            c.execute(
                '''
                INSERT INTO name_disambiguation (name_as_scraped, fantasy_player_id)
                  VALUES (%s, %s)
            ''', (team_name, team_names[0]))