def current_series(year, timestamp): print '\tdetermining current series probabilities' games_query = "SELECT IFNULL(SUM(IF(winning_team IS NOT NULL,1,0)),0) FROM __in_playoff_game_results WHERE year = %s;" % ( year) total_playoff_games_played = db.query(games_query)[0][0] qry = """SELECT series_id, year, strength_type, team, opponent, series_wins, series_losses FROM __in_playoff_bracket WHERE update_time = (SELECT MAX(update_time) FROM __in_playoff_bracket) AND year = %s;""" query = qry % (year) res = db.query(query) for row in res: series_id, year, strength_type, team, opponent, series_wins, series_losses = row series_type = series_id.replace('AL', '').replace('NL', '')[:2] games_dict = {'WC': 1, 'DS': 5, 'CS': 7, 'WS': 7} series_games = games_dict.get(series_type) team_abb = helper.get_team_abb(team, year) oppn_abb = helper.get_team_abb(opponent, year) team_winProb = get_single_game_win_prob(team_abb, oppn_abb, strength_type, year) entry = { 'update_time': timestamp, 'series_id': series_id, 'year': year, 'team': team, 'opponent': opponent, 'series_wins': series_wins, 'series_losses': series_losses, 'strength_type': strength_type, 'team_winProb': team_winProb, 'total_playoff_games_played': total_playoff_games_played } team_probs = [] if series_wins == series_games / 2 + 1: team_probs.append(1) total_games = series_wins + series_losses if total_games > 2: colName = 'team_in' + str(total_games) entry[colName] = 1 if series_losses == series_games / 2 + 1: team_probs.append(0) if (series_wins != series_games / 2 + 1 and series_losses != series_games / 2 + 1): for end_game in range(series_games / 2 + 1, series_games + 1 - series_losses): team_in_N = BinomDist.pmf(n=end_game - 1 - series_wins, k=(series_games / 2 - series_wins), p=team_winProb) * team_winProb col_name = 'team_in' + str(end_game + series_losses) team_probs.append(team_in_N) if end_game > 2: entry[col_name] = team_in_N entry['team_seriesProb'] = sum(team_probs) db.insertRowDict(entry, '__in_playoff_bracket', insertMany=False, replace=True, rid=0, debug=1) db.conn.commit()
def get_optimal_lineups(year, season_gp): optimal_query = """SELECT team_abb, starter_val, bullpen_val, l.lineup_val AS lineup_vsL, r.lineup_val AS lineup_vsR, total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val) AS roster_WAR, starter_var, bullpen_var, l.lineup_var AS vsL_var, r.lineup_var AS vsR_var, total_var + 0.25*l.lineup_var + 0.75*r.lineup_var AS roster_var FROM __optimal_pitching p JOIN __optimal_lineups l USING (team_abb) JOIN __optimal_lineups r USING (team_abb) WHERE l.vs_hand = 'l' AND r.vs_hand = 'r' AND l.dh_name IS NOT NULL AND r.dh_name IS NOT NULL ORDER BY team_abb ASC;""" total_roster_war_query = """SELECT SUM(p.total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val)) AS roster_WAR FROM __optimal_pitching p JOIN __optimal_lineups l USING (team_abb) JOIN __optimal_lineups r USING (team_abb) WHERE l.vs_hand = 'l' AND r.vs_hand = 'r' AND l.dh_name IS NOT NULL AND r.dh_name IS NOT NULL;""" # should be around ~1000 total_roster_war = db.query(total_roster_war_query)[0][0] # should be around 48 (48-114 replacement level?) replacement_team_wins = (2430-float(total_roster_war))/30 # should be around .300 rep_team_win_pct = float(replacement_team_wins)/162 optimal_res = db.query(optimal_query) for row in optimal_res: entry = {} team_abb, starter_val, bullpen_val, lu_vsL, lu_vsR, roster_WAR, starter_var, bullpen_var, vsL_var, vsR_var, roster_var = row mascot_name = helper.get_mascot_names(team_abb.upper(), year) team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name) team_abb = helper.get_team_abb(team_name, year) # mascot_name = helper.get_mascot_names(team_abb.upper(), year-1) # team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name) # team_abb = helper.get_team_abb(team_name, year-1) games_played = float(games_played) if games_played > 162.0: roster_W = float(roster_WAR) + rep_team_win_pct*games_played roster_pct = roster_W/games_played ros_g = 0 else: roster_W = float(roster_WAR) + rep_team_win_pct*162 roster_pct = roster_W/162.0 ros_g = 162-games_played try: w_pct = float(W)/float(W+L) py_pct = float(py_W)/float(py_W+py_L) except ZeroDivisionError: w_pct = 0.5 py_pct = 0.5 # logistic weights for pythag% and win& # rest of season projected win% = (1-2w)*(roster%) + w(pythag%) + w(win%) # where w = (0.25) / (1+20e^(-0.035*games_played)) if games_played <= 10: current_weight = 0.0015*float(games_played) else: current_weight = 0.25 / (1 + 20*math.exp(-0.035*float(games_played))) ros_pct = (1-2*current_weight)*roster_pct + (current_weight)*max(py_pct, 0.25) + (current_weight)*max(w_pct, 0.25) ros_W = ros_pct*ros_g # for the total amount of variance for the team, we first take the total amount of variance from team projections (based on the variance in each individual player's projection) total_roster_var = float(roster_var) # then we add a measure of variance based on the difference between true talent record (pythag record) and observed record (see /variance_research/Full Season Pythag Standings std research.png) total_roster_var += -0.0055021865*(ros_pct*162) + 3.4706743014 # Finally we add a value of 5.0 to the STANDARD DEVIATION (not variance). We can express the amount of variance desired to add in the set of equations {std = sqrt(v), std+5.0 = sqrt(v+c)}, and then solving for c (https://tinyurl.com/y8tk64ez) # NB. the value of 5.0 is a guess (~0.33 win for each starter plus a small amount for bench players and relief pitchers) and hack-y and should be cleaned up, or at least weighted more towards defensive #s over wOBA numbers) wins to the variance due to my uncertain nature (mostly from defense) of my conversion from raw ZiPS to DMB WAR (i.e., I think if my projection says the team is a true talent 90 win team, I think there is +/- 5.0 wins of standard deviation in that projection) total_roster_var += 10*math.sqrt(total_roster_var) + 25 projected_W = W + ros_W if games_played > 162.0: roster_L = games_played - roster_W projected_L = games_played - projected_W projected_pct = projected_W/games_played else: roster_L = 162.0 - roster_W projected_L = 162.0 - projected_W projected_pct = projected_W/162.0 entry['team_abb'] = team_abb entry['team_name'] = team_name entry['year'] = year entry['season_gp'] = season_gp entry['games_played'] = games_played entry['starter_val'] = starter_val entry['bullpen_val'] = bullpen_val entry['vsR_val'] = lu_vsR entry['vsL_val'] = lu_vsL entry['roster_strength'] = roster_WAR entry['starter_var'] = starter_var entry['bullpen_var'] = bullpen_var entry['vsR_var'] = vsR_var entry['vsL_var'] = vsL_var entry['roster_var'] = roster_var entry['overall_var'] = total_roster_var entry['roster_W'] = roster_W entry['roster_L'] = roster_L entry['roster_pct'] = roster_pct entry['current_W'] = W entry['current_L'] = L entry['current_pct'] = w_pct entry['ros_W'] = ros_W entry['ros_L'] = ros_g - ros_W entry['ros_pct'] = ros_pct entry['projected_W'] = projected_W entry['projected_L'] = projected_L entry['projected_pct'] = projected_pct # raw_input(entry) db.insertRowDict(entry, '__team_strength', insertMany=False, replace=True, rid=0,debug=1) db.conn.commit()