Ejemplo n.º 1
0
def process_division(year):
    print 'division'
    for _type in ('roster', 'projected'):
        # print '\t', _type
        for div in ('AL East', 'AL Central', 'AL West', 'NL East',
                    'NL Central', 'NL West'):

            qry = """SELECT 
            team_abb, team_name, 
            mean_W/162.0, var, year, games_played
            FROM __playoff_probabilities
            JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
            WHERE strength_type = '%s'
            AND division = '%s'
            AND year = %s;"""
            query = qry % (_type, div, year)

            # raw_input(query)

            res = db.query(query)

            div_dict = {}
            for row in res:
                team_abb, team_name, strength_pct, var, year, games_played = row
                # print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                win_division_prob = np.prod(
                    get_probabilities(team_name,
                                      div_teams, strength_pct, games_played,
                                      float(var), _type, year)[0])

                div_dict[team_name] = [
                    win_division_prob, 1.0, False, year, games_played
                ]

            col_name = 'win_division'
            adjust_probabilities(div_dict, col_name, 1.0, _type)
Ejemplo n.º 2
0
def process_top_seed(year):
    print "top seed"

    for _type in ('roster', 'projected'):
        # print '\t', _type
        for conf in ('AL', 'NL'):
            team_qry = """SELECT 
            team_abb, team_name, win_division,
            mean_W/162.0, var, year, games_played
            FROM __playoff_probabilities
            JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
            WHERE strength_type = '%s' 
            AND LEFT(division,2) = '%s'
            AND year = %s;"""
            team_query = team_qry % (_type, conf, year)
            # raw_input(team_query)
            team_res = db.query(team_query)

            top_dict = {}
            for team_row in team_res:
                team_abb, team_name, max_prob, strength_pct, var, year, games_played = team_row
                max_prob = float(max_prob)
                # print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                top_seed_prob = np.prod(
                    get_probabilities(team_name,
                                      conf_teams, strength_pct, games_played,
                                      float(var), _type, year)[0])

                top_dict[team_name] = [
                    top_seed_prob, max_prob, False, year, games_played
                ]

            col_name = 'top_seed'
            adjust_probabilities(top_dict, col_name, 1.0, _type)
Ejemplo n.º 3
0
def process_basic(year):
    print 'initial table setup'
    for _type in ('roster', 'projected'):

        basic_query = """SELECT
        team_abb, team_name,
        year, season_gp, games_played, current_W, current_L,
        overall_var,
        roster_W, roster_L, roster_pct,
        ros_W, ros_L, ros_pct,
        projected_W, projected_L, projected_pct
        FROM __team_strength t1
        JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __team_strength GROUP BY team_abb, year) t2 USING (team_abb, year, games_played)
        WHERE year = %s;"""

        basic_query = basic_query % (year)

        # raw_input(basic_query)

        basic_res = db.query(basic_query)

        for basic_row in basic_res:
            entry = {}
            team_abb, team_name, year, season_gp, games_played, cur_W, cur_L, overall_var, roster_W, roster_L, roster_pct, ros_W, ros_L, ros_pct, projected_W, projected_L, projected_pct = basic_row

            games_played = float(games_played)
            games_remaining = float(max(0.0, 162.0 - games_played))

            # linearly scaled variance (no variance at game 162, full variance at game 0)
            projected_var = max(0.001,
                                float(overall_var) * (games_remaining / 162.0))

            projected_std = max(
                0.001,
                math.sqrt(float(overall_var)) * (games_remaining / 162.0))

            division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                team_name, year)

            if _type == 'roster':
                p_95 = float(roster_W) + 1.96 * math.sqrt(float(overall_var))
                p_75 = float(roster_W) + 1.15 * math.sqrt(float(overall_var))
                p_25 = float(roster_W) - 1.15 * math.sqrt(float(overall_var))
                p_05 = float(roster_W) - 1.96 * math.sqrt(float(overall_var))
                entry['team_abb'] = team_abb
                entry['team_name'] = team_name
                entry['year'] = year
                entry['season_gp'] = season_gp
                entry['games_played'] = games_played
                entry['division'] = division
                entry['strength_type'] = _type
                entry['strength_pct'] = roster_pct
                entry['var'] = overall_var
                entry['mean_W'] = roster_W
                entry['mean_L'] = roster_L
                entry['p_95'] = p_95
                entry['p_75'] = p_75
                entry['p_25'] = p_25
                entry['p_05'] = p_05

            elif _type == 'projected':
                p_95 = float(projected_W) + 1.96 * (projected_std)
                p_75 = float(projected_W) + 1.15 * (projected_std)
                p_25 = float(projected_W) - 1.15 * (projected_std)
                p_05 = float(projected_W) - 1.96 * (projected_std)

                entry['team_abb'] = team_abb
                entry['team_name'] = team_name
                entry['year'] = year
                entry['season_gp'] = season_gp
                entry['games_played'] = games_played
                entry['division'] = division
                entry['strength_type'] = _type
                entry['strength_pct'] = ros_pct
                entry['var'] = projected_var
                entry['mean_W'] = projected_W
                entry['mean_L'] = projected_L
                entry['p_95'] = p_95
                entry['p_75'] = p_75
                entry['p_25'] = p_25
                entry['p_05'] = p_05

            db.insertRowDict(entry,
                             '__playoff_probabilities',
                             insertMany=False,
                             replace=True,
                             rid=0,
                             debug=1)
            db.conn.commit()
Ejemplo n.º 4
0
def process_wc2(year):
    print "wc2"
    for _type in ('roster', 'projected'):
        print '\t', _type
        for conf in ('AL', 'NL'):

            team_query = "SELECT team_abb, team_name, (win_division+wc_1), mean_W/162.0, var, year, games_played FROM __playoff_probabilities JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 USING (team_abb, year, games_played) WHERE strength_type = '%s' AND LEFT(division,2) = '%s'AND year = %s" % (
                _type, conf, year)

            team_res = db.query(team_query)

            wc2_dict = {}
            for team_row in team_res:
                team_abb, team_name, po_prob, strength_pct, var, year, games_played = team_row
                print '\t\t', team_name

                if games_played > 162:
                    strength_pct = float(
                        (float(strength_pct) * 162.0) / float(games_played))
                else:
                    strength_pct = float(strength_pct)

                division, div_teams, conf_teams, non_conf_teams = helper.get_division(
                    team_name, year)

                div_winners_qry = """SELECT 
                p1.team_name,
                p2.team_name,
                p3.team_name,
                p4.team_name,
                (p1.win_division+p1.wc_1)*(p2.win_division+p2.wc_1)*(p3.win_division+p3.wc_1)*(p4.win_division+p4.wc_1)
                FROM __playoff_probabilities p1
                JOIN __playoff_probabilities p2
                JOIN __playoff_probabilities p3
                JOIN __playoff_probabilities p4
                JOIN (SELECT team_abb, MAX(year) AS year, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t1 ON (p1.team_abb=t1.team_abb AND p1.year=t1.year AND  p1.games_played=t1.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t2 ON (p2.team_abb=t2.team_abb AND p2.year=t2.year AND  p2.games_played=t2.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t3 ON (p3.team_abb=t3.team_abb AND p3.year=t3.year AND  p3.games_played=t3.games_played)
                JOIN (SELECT team_abb, MAX(YEAR) AS YEAR, MAX(games_played) AS games_played FROM __playoff_probabilities GROUP BY team_abb, year) t4 ON (p4.team_abb=t4.team_abb AND p4.year=t4.year AND  p4.games_played=t4.games_played)
                WHERE 1
                AND p1.strength_type = '%s'
                AND p2.strength_type = '%s'
                AND p3.strength_type = '%s'
                AND p4.strength_type = '%s'
                AND p1.division = '%s West'
                AND p2.division = '%s Central'
                AND p3.division = '%s East'
                AND LEFT(p4.division,2) = '%s'
                AND p1.team_name != '%s'
                AND p2.team_name != '%s'
                AND p3.team_name != '%s'
                AND p4.team_name != '%s'
                AND p1.team_name != p4.team_name
                AND p2.team_name != p4.team_name
                AND p3.team_name != p4.team_name
                AND p1.year = %s
                AND p2.year = %s
                AND p3.year = %s
                AND p4.year = %s;"""

                div_winners_query = div_winners_qry % (
                    _type, _type, _type, _type, conf, conf, conf, conf,
                    team_name, team_name, team_name, team_name, year, year,
                    year, year)
                div_winners_res = db.query(div_winners_query)

                wc2_pre_prob = float(0.0)
                for div_row in div_winners_res:
                    div1_team, div2_team, div3_team, div4_team, situation_prob = div_row

                    set_teams = []
                    for tm in conf_teams:
                        if tm not in (div1_team, div2_team, div3_team,
                                      div4_team):
                            set_teams.append(tm)

                    win_wc2_prob = np.prod(
                        get_probabilities(team_name, set_teams, strength_pct,
                                          games_played, float(var), _type,
                                          year)[0])

                    wc2_pre_prob += (float(situation_prob) *
                                     float(win_wc2_prob))

                wc2_pre_prob = wc2_pre_prob * (1.0 - float(po_prob))
                wc2_dict[team_name] = [
                    wc2_pre_prob, (1.0 - float(po_prob)), False, year,
                    games_played
                ]

            col_name = 'wc_2'
            adjust_probabilities(wc2_dict, col_name, 1.0, _type)