def get_optimal_lineups(year, season_gp): optimal_query = """SELECT team_abb, starter_val, bullpen_val, l.lineup_val AS lineup_vsL, r.lineup_val AS lineup_vsR, total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val) AS roster_WAR, starter_var, bullpen_var, l.lineup_var AS vsL_var, r.lineup_var AS vsR_var, total_var + 0.25*l.lineup_var + 0.75*r.lineup_var AS roster_var FROM __optimal_pitching p JOIN __optimal_lineups l USING (team_abb) JOIN __optimal_lineups r USING (team_abb) WHERE l.vs_hand = 'l' AND r.vs_hand = 'r' AND l.dh_name IS NOT NULL AND r.dh_name IS NOT NULL ORDER BY team_abb ASC;""" total_roster_war_query = """SELECT SUM(p.total_val + 0.25*(l.lineup_val) + 0.75*(r.lineup_val)) AS roster_WAR FROM __optimal_pitching p JOIN __optimal_lineups l USING (team_abb) JOIN __optimal_lineups r USING (team_abb) WHERE l.vs_hand = 'l' AND r.vs_hand = 'r' AND l.dh_name IS NOT NULL AND r.dh_name IS NOT NULL;""" # should be around ~1000 total_roster_war = db.query(total_roster_war_query)[0][0] # should be around 48 (48-114 replacement level?) replacement_team_wins = (2430-float(total_roster_war))/30 # should be around .300 rep_team_win_pct = float(replacement_team_wins)/162 optimal_res = db.query(optimal_query) for row in optimal_res: entry = {} team_abb, starter_val, bullpen_val, lu_vsL, lu_vsR, roster_WAR, starter_var, bullpen_var, vsL_var, vsR_var, roster_var = row mascot_name = helper.get_mascot_names(team_abb.upper(), year) team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name) team_abb = helper.get_team_abb(team_name, year) # mascot_name = helper.get_mascot_names(team_abb.upper(), year-1) # team_name, games_played, rep_WAR, oWAR, dWAR, FIP_WAR, W, L, py_W, py_L = get_standing_metrics(year, mascot_name) # team_abb = helper.get_team_abb(team_name, year-1) games_played = float(games_played) if games_played > 162.0: roster_W = float(roster_WAR) + rep_team_win_pct*games_played roster_pct = roster_W/games_played ros_g = 0 else: roster_W = float(roster_WAR) + rep_team_win_pct*162 roster_pct = roster_W/162.0 ros_g = 162-games_played try: w_pct = float(W)/float(W+L) py_pct = float(py_W)/float(py_W+py_L) except ZeroDivisionError: w_pct = 0.5 py_pct = 0.5 # logistic weights for pythag% and win& # rest of season projected win% = (1-2w)*(roster%) + w(pythag%) + w(win%) # where w = (0.25) / (1+20e^(-0.035*games_played)) if games_played <= 10: current_weight = 0.0015*float(games_played) else: current_weight = 0.25 / (1 + 20*math.exp(-0.035*float(games_played))) ros_pct = (1-2*current_weight)*roster_pct + (current_weight)*max(py_pct, 0.25) + (current_weight)*max(w_pct, 0.25) ros_W = ros_pct*ros_g # for the total amount of variance for the team, we first take the total amount of variance from team projections (based on the variance in each individual player's projection) total_roster_var = float(roster_var) # then we add a measure of variance based on the difference between true talent record (pythag record) and observed record (see /variance_research/Full Season Pythag Standings std research.png) total_roster_var += -0.0055021865*(ros_pct*162) + 3.4706743014 # Finally we add a value of 5.0 to the STANDARD DEVIATION (not variance). We can express the amount of variance desired to add in the set of equations {std = sqrt(v), std+5.0 = sqrt(v+c)}, and then solving for c (https://tinyurl.com/y8tk64ez) # NB. the value of 5.0 is a guess (~0.33 win for each starter plus a small amount for bench players and relief pitchers) and hack-y and should be cleaned up, or at least weighted more towards defensive #s over wOBA numbers) wins to the variance due to my uncertain nature (mostly from defense) of my conversion from raw ZiPS to DMB WAR (i.e., I think if my projection says the team is a true talent 90 win team, I think there is +/- 5.0 wins of standard deviation in that projection) total_roster_var += 10*math.sqrt(total_roster_var) + 25 projected_W = W + ros_W if games_played > 162.0: roster_L = games_played - roster_W projected_L = games_played - projected_W projected_pct = projected_W/games_played else: roster_L = 162.0 - roster_W projected_L = 162.0 - projected_W projected_pct = projected_W/162.0 entry['team_abb'] = team_abb entry['team_name'] = team_name entry['year'] = year entry['season_gp'] = season_gp entry['games_played'] = games_played entry['starter_val'] = starter_val entry['bullpen_val'] = bullpen_val entry['vsR_val'] = lu_vsR entry['vsL_val'] = lu_vsL entry['roster_strength'] = roster_WAR entry['starter_var'] = starter_var entry['bullpen_var'] = bullpen_var entry['vsR_var'] = vsR_var entry['vsL_var'] = vsL_var entry['roster_var'] = roster_var entry['overall_var'] = total_roster_var entry['roster_W'] = roster_W entry['roster_L'] = roster_L entry['roster_pct'] = roster_pct entry['current_W'] = W entry['current_L'] = L entry['current_pct'] = w_pct entry['ros_W'] = ros_W entry['ros_L'] = ros_g - ros_W entry['ros_pct'] = ros_pct entry['projected_W'] = projected_W entry['projected_L'] = projected_L entry['projected_pct'] = projected_pct # raw_input(entry) db.insertRowDict(entry, '__team_strength', insertMany=False, replace=True, rid=0,debug=1) db.conn.commit()
def process(): print "processed_team_standings_advanced" table = 'processed_team_standings_advanced' db.query("TRUNCATE TABLE `" + table + "`") entries = [] teamWAR_qry = """SELECT year, team_abb, dWAR, oWAR, (replacement/10) as repWAR, FIP_WAR, ERA_WAR FROM processed_WAR_team """ team_WAR_list = db.query(teamWAR_qry) for team in team_WAR_list: year, team_abb, dWAR, oWAR, repWAR, FIP_WAR, ERA_WAR = team mascot_name = helper.get_mascot_names(team_abb.upper(), year) #a full season is ~17 replacement wins? repWAR = float(repWAR) pos_WAR = float(dWAR) + float(oWAR) + repWAR fWAR = pos_WAR + float(FIP_WAR) rWAR = pos_WAR + float(ERA_WAR) if team_abb == '': continue else: record_q = """SELECT year, team_name, games_played, w, l, rf, ra FROM team_standings WHERE team_name LIKE '%%%s%%' AND year = %s AND games_played = (SELECT MAX(games_played) FROM team_standings WHERE team_name LIKE '%%%s%%' AND year = %s) """ record_qry = record_q % (mascot_name, year, mascot_name, year) # raw_input(record_qry) record = db.query(record_qry)[0] year, team_name, games_played, w, l, rf, ra = record # http://www.had2know.com/sports/pythagorean-expectation-win-percentage-baseball.html pythag_x = ((float(rf) + float(ra)) / (float(w) + float(l)))**(float(0.285)) pythag_win_pct = (float(rf)**pythag_x) / ((float(rf)**pythag_x) + (float(ra)**pythag_x)) pythag_wins = (w + l) * pythag_win_pct pythag_losses = games_played - (pythag_wins) if year < 2017: rep_team_win_pct = 0.300 else: rep_team_win_pct = 0.325 rep_team_wins = rep_team_win_pct * games_played # f_wins = (pos_WAR/repWAR)*17.0 + float(FIP_WAR) + rep_team_wins # f_losses = games_played - (f_wins) # r_wins = (pos_WAR/repWAR)*17.0 + float(ERA_WAR) + rep_team_wins # r_losses = games_played - (r_wins) f_wins = fWAR + rep_team_wins f_losses = games_played - (f_wins) r_wins = rWAR + rep_team_wins r_losses = games_played - (r_wins) entry = { "year": year, "team_name": team_name, "games_played": games_played, "repWAR": repWAR, "oWAR": oWAR, "dWAR": dWAR, "FIP_WAR": FIP_WAR, "ERA_WAR": ERA_WAR, "RF": rf, "RA": ra, "f_Wins": f_wins, "f_Losses": f_losses, "r_Wins": r_wins, "r_Losses": r_losses, "py_Wins": pythag_wins, "py_Losses": pythag_losses, "W": w, "L": l } entries.append(entry) if entries != []: db.insertRowDict(entries, table, replace=True, insertMany=True, rid=0) db.conn.commit()