def current_series(year, timestamp):
    print '\tdetermining current series probabilities'

    games_query = "SELECT IFNULL(SUM(IF(winning_team IS NOT NULL,1,0)),0) FROM __in_playoff_game_results WHERE year = %s;" % (
        year)

    total_playoff_games_played = db.query(games_query)[0][0]

    qry = """SELECT 
    series_id, year, strength_type, 
    team, opponent,
    series_wins, series_losses
    FROM __in_playoff_bracket
    WHERE update_time = (SELECT MAX(update_time) FROM __in_playoff_bracket)
    AND year = %s;"""

    query = qry % (year)

    res = db.query(query)

    for row in res:
        series_id, year, strength_type, team, opponent, series_wins, series_losses = row

        series_type = series_id.replace('AL', '').replace('NL', '')[:2]
        games_dict = {'WC': 1, 'DS': 5, 'CS': 7, 'WS': 7}
        series_games = games_dict.get(series_type)

        team_abb = helper.get_team_abb(team, year)
        oppn_abb = helper.get_team_abb(opponent, year)

        team_winProb = get_single_game_win_prob(team_abb, oppn_abb,
                                                strength_type, year)

        entry = {
            'update_time': timestamp,
            'series_id': series_id,
            'year': year,
            'team': team,
            'opponent': opponent,
            'series_wins': series_wins,
            'series_losses': series_losses,
            'strength_type': strength_type,
            'team_winProb': team_winProb,
            'total_playoff_games_played': total_playoff_games_played
        }

        team_probs = []

        if series_wins == series_games / 2 + 1:
            team_probs.append(1)
            total_games = series_wins + series_losses
            if total_games > 2:
                colName = 'team_in' + str(total_games)
                entry[colName] = 1

        if series_losses == series_games / 2 + 1:
            team_probs.append(0)

        if (series_wins != series_games / 2 + 1
                and series_losses != series_games / 2 + 1):
            for end_game in range(series_games / 2 + 1,
                                  series_games + 1 - series_losses):
                team_in_N = BinomDist.pmf(n=end_game - 1 - series_wins,
                                          k=(series_games / 2 - series_wins),
                                          p=team_winProb) * team_winProb

                col_name = 'team_in' + str(end_game + series_losses)

                team_probs.append(team_in_N)

                if end_game > 2:
                    entry[col_name] = team_in_N

        entry['team_seriesProb'] = sum(team_probs)

        db.insertRowDict(entry,
                         '__in_playoff_bracket',
                         insertMany=False,
                         replace=True,
                         rid=0,
                         debug=1)
        db.conn.commit()
Beispiel #2
0
def get_optimal_lineups(year, season_gp):
    optimal_query = """SELECT team_abb, 
    starter_val, bullpen_val, 
    l.lineup_val AS lineup_vsL, r.lineup_val AS lineup_vsR,
    total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val) AS roster_WAR,
    starter_var, bullpen_var,
    l.lineup_var AS vsL_var, r.lineup_var AS vsR_var,
    total_var + 0.25*l.lineup_var + 0.75*r.lineup_var AS roster_var
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL
    ORDER BY team_abb ASC;"""

    total_roster_war_query = """SELECT
    SUM(p.total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val)) AS roster_WAR
    FROM __optimal_pitching p
    JOIN __optimal_lineups l USING (team_abb)
    JOIN __optimal_lineups r USING (team_abb)
    WHERE l.vs_hand = 'l'
    AND r.vs_hand = 'r'
    AND l.dh_name IS NOT NULL
    AND r.dh_name IS NOT NULL;"""

    # should be around ~1000
    total_roster_war = db.query(total_roster_war_query)[0][0]

    # should be around 48 (48-114 replacement level?)
    replacement_team_wins = (2430-float(total_roster_war))/30

    # should be around .300
    rep_team_win_pct = float(replacement_team_wins)/162

    optimal_res = db.query(optimal_query)

    for row in optimal_res:
        entry = {}
        team_abb, starter_val, bullpen_val, lu_vsL, lu_vsR, roster_WAR, starter_var, bullpen_var, vsL_var, vsR_var, roster_var = row

        mascot_name = helper.get_mascot_names(team_abb.upper(), year)
        team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        team_abb = helper.get_team_abb(team_name, year)
        # mascot_name = helper.get_mascot_names(team_abb.upper(), year-1)
        # team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name)
        # team_abb = helper.get_team_abb(team_name, year-1)


        games_played = float(games_played)

        if games_played > 162.0:
            roster_W = float(roster_WAR) + rep_team_win_pct*games_played
            roster_pct = roster_W/games_played
            ros_g = 0
        else:
            roster_W = float(roster_WAR) + rep_team_win_pct*162
            roster_pct = roster_W/162.0
            ros_g = 162-games_played

        try:
            w_pct = float(W)/float(W+L)
            py_pct = float(py_W)/float(py_W+py_L)
        except ZeroDivisionError:
            w_pct = 0.5
            py_pct = 0.5


        # logistic weights for pythag% and win&
        # rest of season projected win% = (1-2w)*(roster%) + w(pythag%) + w(win%)
        # where w = (0.25) / (1+20e^(-0.035*games_played))
        if games_played <= 10:
            current_weight = 0.0015*float(games_played)
        else:
            current_weight = 0.25 / (1 + 20*math.exp(-0.035*float(games_played)))

        ros_pct = (1-2*current_weight)*roster_pct + (current_weight)*max(py_pct, 0.25) + (current_weight)*max(w_pct, 0.25)

        ros_W = ros_pct*ros_g


        # for the total amount of variance for the team, we first take the total amount of variance from team projections (based on the variance in each individual player's projection)
        total_roster_var = float(roster_var) 
        # then we add a measure of variance based on the difference between true talent record (pythag record) and observed record (see /variance_research/Full Season Pythag Standings std research.png)
        total_roster_var += -0.0055021865*(ros_pct*162) + 3.4706743014
        # Finally we add a value of 5.0 to the STANDARD DEVIATION (not variance). We can express the amount of variance desired to add in the set of equations {std = sqrt(v), std+5.0 = sqrt(v+c)}, and then solving for c (https://tinyurl.com/y8tk64ez)
        # NB. the value of 5.0 is a guess (~0.33 win for each starter plus a small amount for bench players and relief pitchers) and hack-y and should be cleaned up, or at least weighted more towards defensive #s over wOBA numbers) wins to the variance due to my uncertain nature (mostly from defense) of my conversion from raw ZiPS to DMB WAR (i.e., I think if my projection says the team is a true talent 90 win team, I think there is +/- 5.0 wins of standard deviation in that projection)
        total_roster_var += 10*math.sqrt(total_roster_var) + 25

        



        projected_W = W + ros_W
        

        if games_played > 162.0:
            roster_L = games_played - roster_W
            projected_L = games_played - projected_W
            projected_pct = projected_W/games_played
        else:
            roster_L = 162.0 - roster_W
            projected_L = 162.0 - projected_W
            projected_pct = projected_W/162.0

        entry['team_abb'] = team_abb
        entry['team_name'] = team_name
        entry['year'] = year
        entry['season_gp'] = season_gp
        entry['games_played'] = games_played
        entry['starter_val'] = starter_val
        entry['bullpen_val'] = bullpen_val
        entry['vsR_val'] = lu_vsR
        entry['vsL_val'] = lu_vsL
        entry['roster_strength'] = roster_WAR
        entry['starter_var'] = starter_var
        entry['bullpen_var'] = bullpen_var
        entry['vsR_var'] = vsR_var
        entry['vsL_var'] = vsL_var
        entry['roster_var'] = roster_var
        entry['overall_var'] = total_roster_var    
        entry['roster_W'] = roster_W
        entry['roster_L'] = roster_L
        entry['roster_pct'] = roster_pct
        entry['current_W'] = W
        entry['current_L'] = L
        entry['current_pct'] = w_pct
        entry['ros_W'] = ros_W
        entry['ros_L'] = ros_g - ros_W
        entry['ros_pct'] = ros_pct
        entry['projected_W'] = projected_W
        entry['projected_L'] = projected_L
        entry['projected_pct'] = projected_pct

        # raw_input(entry)
        db.insertRowDict(entry, '__team_strength', insertMany=False, replace=True, rid=0,debug=1)
        db.conn.commit()