def _calculate_normalized_play_pct(positon_players, normalized_data): for player in positon_players.values(): rbiz_successes = player["routinebizfield"] / player["routinebiz"] * normalized_data["routinebiz"] if player["routinebiz"] > 0 else 0 lbiz_successes = player["likelybizfield"] / player["likelybiz"] * normalized_data["likelybiz"] if player["likelybiz"] > 0 else 0 ebiz_successes = player["evenbizfield"] / player["evenbiz"] * normalized_data["evenbiz"] if player["evenbiz"] > 0 else 0 ulbiz_successes = player["unlikelybizfield"] / player["unlikelybiz"] * normalized_data["unlikelybiz"] if player["unlikelybiz"] > 0 else 0 vulbiz_successes = player["veryunlikelybizfield"] / player["veryunlikelybiz"] * normalized_data["veryunlikelybiz"] if player["veryunlikelybiz"] > 0 else 0 player["adj_play_pct"] = (rbiz_successes + lbiz_successes + ebiz_successes + ulbiz_successes + vulbiz_successes) / normalized_data["totbiz"] if normalized_data["totbiz"] > 0 else 0 player["adj_fieldedballs"] = (player["routinebizfield"] + player["likelybizfield"] + player["evenbizfield"] + player["unlikelybizfield"] + player["veryunlikelybizfield"]) / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0 player["adj_totbiz"] = player["totalbiz"] / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0 player["adj_zr"] = player["zr"] / ip_to_num(player["fieldingip"]) * 9 * 162 if player["fieldingip"] > 0 else 0
def calculate_league_stats(ovr_data, vl_data, vr_data, splits): tot_batr = 0.0 tot_bsr = 0.0 tot_zr = 0.0 tot_pas = 0 for player_data in ovr_data.values(): tot_batr += player_data["batr"] tot_bsr += player_data["baserunningruns"] tot_zr += player_data["zr"] tot_pas += player_data["pa"] league_adj = -1 * (tot_batr + tot_bsr + tot_zr) / tot_pas replacement_player_adj = 228000 / 185553 batter_stats = { "avg": lambda pd: pd["hits"] / pd["ab"] if pd["ab"] > 0 else 0, "obp": lambda pd: (pd["hits"] + pd["walks"] + pd["timeshitbypitch"]) / pd["pa"] if pd["pa"] > 0 else 0, "ops": lambda pd: (pd["hits"] + pd["doubles"] + pd["triples"] * 2 + pd[ "homeruns"] * 3) / pd["ab"] + (pd["hits"] + pd["walks"] + pd["timeshitbypitch"]) / pd["pa"] if pd["ab"] > 0 else 0, "bsr_600_pa": lambda pd: ovr_data[str(pd["t_CID"])][ "baserunningruns"] * 600 / ovr_data[str(pd["t_CID"])]["pa"] if pd["pa"] > 0 else 0, "zr_600_pa": lambda pd: pd["zr"] * 600 / pd["pa"] if pd["pa"] > 0 else 0, "batr_600_pa": lambda pd: pd["batr"] * 600 / pd["pa"] if pd["pa"] > 0 else 0, # last number is replacement adjustment, so it's not based on average "war_600_pa": lambda pd: (pd["batr"] + pd["zr"] + pd["baserunningruns"] + league_adj + replacement_player_adj) * 600 / (pd["pa"] * 10) if pd["pa"] > 0 else 0 } pitcher_stats = { "sp_k_per_9": lambda pd: pd["sp_k"] * 9 / ip_to_num(pd["sp_ip"]) if pd["sp_ip"] > 0 else 0, "sp_bb_with_hbp_per_9": lambda pd: (pd["sp_bb"] + pd["sp_playershitbypitch"]) * 9 / ip_to_num(pd["sp_ip"]) if pd["sp_ip"] > 0 else 0, "sp_hr_per_9": lambda pd: pd["sp_hra"] * 9 / ip_to_num(pd["sp_ip"]) if pd["sp_ip"] > 0 else 0, "rp_k_per_9": lambda pd: pd["rp_k"] * 9 / ip_to_num(pd["rp_ip"]) if pd["rp_ip"] > 0 else 0, "rp_bb_with_hbp_per_9": lambda pd: (pd["rp_bb"] + pd["rp_playershitbypitch"]) * 9 / ip_to_num(pd["rp_ip"]) if pd["rp_ip"] > 0 else 0, "rp_hr_per_9": lambda pd: pd["rp_hra"] * 9 / ip_to_num(pd["rp_ip"]) if pd["rp_ip"] > 0 else 0, "ip_per_gamesstarted": lambda pd: pd["sp_ip"] / pd["gamesstarted"] if pd["gamesstarted"] > 0 and pd["gamesstarted"] == pd["games"] else 0, "ip_per_gamesrelieved": lambda pd: pd["rp_ip"] / pd["games"] if pd["games"] > 0 and pd["gamesstarted"] == 0 else 0, "sp_war_per_220_ip": lambda pd: pd["sp_war"] * 220 / ip_to_num(pd["sp_ip"]) if pd["sp_ip"] > 0 else -50, "rp_war_per_100_ip": lambda pd: pd["rp_war"] * 100 / ip_to_num(pd["rp_ip"]) if pd["rp_ip"] > 0 else 0, } _calculate_stats_per_data(vl_data, batter_stats, pitcher_stats, "vL") _calculate_stats_per_data(vr_data, batter_stats, pitcher_stats, "vR") _calculate_stats_per_data(ovr_data, batter_stats, pitcher_stats, "ovr") progress_bar = ProgressBar(len(ovr_data.keys()), "Adding extra ovr data") for card in ovr_data.values(): cid = str(card["t_CID"]) lefty_stats = vl_data[cid] if cid in vl_data else None righty_stats = vr_data[cid] if cid in vr_data else None if lefty_stats == None or righty_stats == None or lefty_stats[ "pa"] < 10 or righty_stats["pa"] < 10: # Set to some default low values card["batr_600_pa_ft"] = -10 card["batr_600_pa_vr"] = -10 card["batr_600_pa_vl"] = -10 card["war_600_pa_ft"] = -10 card["war_600_pa_vr"] = -10 card["war_600_pa_vl"] = -10 progress_bar.increment() continue position = "catcher" if card["position"] == "C" else "fielder" ft_vr_split = splits["FT"]["vR%"][position] vr_vr_split = splits["vR"]["vR%"][position] vl_vr_split = splits["vL"]["vR%"][position] card["batr_600_pa_ft"] = ft_vr_split * righty_stats["batr_600_pa"] + ( 1 - ft_vr_split * lefty_stats["batr_600_pa"]) card["batr_600_pa_vr"] = vr_vr_split * righty_stats["batr_600_pa"] + ( 1 - vr_vr_split * lefty_stats["batr_600_pa"]) card["batr_600_pa_vl"] = vl_vr_split * righty_stats["batr_600_pa"] + ( 1 - vl_vr_split * lefty_stats["batr_600_pa"]) card["war_600_pa_ft"] = ft_vr_split * righty_stats["war_600_pa"] + ( 1 - ft_vr_split * lefty_stats["war_600_pa"]) card["war_600_pa_vr"] = vr_vr_split * righty_stats["war_600_pa"] + ( 1 - vr_vr_split * lefty_stats["war_600_pa"]) card["war_600_pa_vl"] = vl_vr_split * righty_stats["war_600_pa"] + ( 1 - vl_vr_split * lefty_stats["war_600_pa"]) progress_bar.increment() progress_bar.finish() print()
def merge_player_data(old_info, new_info): if ip_to_num(add_ip(old_info["fieldingip"], new_info["fieldingip"])) > 0: old_info["cera"] = ( old_info["cera"] * ip_to_num(old_info["fieldingip"]) + new_info["cera"] * ip_to_num(new_info["fieldingip"])) / ip_to_num( add_ip(old_info["fieldingip"], new_info["fieldingip"])) if (new_info["sp_bf"] + old_info["sp_bf"] > 0): old_info["sp_fip"] = (new_info["sp_bf"] * new_info["sp_fip"] + old_info["sp_bf"] * old_info["sp_fip"]) / ( new_info["sp_bf"] + old_info["sp_bf"]) if (new_info["rp_bf"] + old_info["rp_bf"] > 0): old_info["rp_fip"] = (new_info["rp_bf"] * new_info["rp_fip"] + old_info["rp_bf"] * old_info["rp_fip"]) / ( new_info["rp_bf"] + old_info["rp_bf"]) old_info["leverage"] = (new_info["rp_bf"] * new_info["leverage"] + old_info["rp_bf"] * old_info["leverage"]) / ( new_info["rp_bf"] + old_info["rp_bf"]) if (new_info["pa"] + old_info["pa"] > 0): old_info["woba"] = (new_info["pa"] * new_info["woba"] + old_info["pa"] * old_info["woba"]) / ( new_info["pa"] + old_info["pa"]) if ip_to_num(add_ip(old_info["sp_ip"], new_info["sp_ip"])) > 0: old_info["sp_era"] = ( old_info["sp_era"] * ip_to_num(old_info["sp_ip"]) + new_info["sp_era"] * ip_to_num(new_info["sp_ip"])) / ip_to_num( add_ip(old_info["sp_ip"], new_info["sp_ip"])) old_info["sp_whip"] = ( old_info["sp_whip"] * ip_to_num(old_info["sp_ip"]) + new_info["sp_whip"] * ip_to_num(new_info["sp_ip"])) / ip_to_num( add_ip(old_info["sp_ip"], new_info["sp_ip"])) if ip_to_num(add_ip(old_info["rp_ip"], new_info["rp_ip"])) > 0: old_info["rp_era"] = ( old_info["rp_era"] * ip_to_num(old_info["rp_ip"]) + new_info["rp_era"] * ip_to_num(new_info["rp_ip"])) / ip_to_num( add_ip(old_info["rp_ip"], new_info["rp_ip"])) old_info["rp_whip"] = ( old_info["rp_whip"] * ip_to_num(old_info["rp_ip"]) + new_info["rp_whip"] * ip_to_num(new_info["rp_ip"])) / ip_to_num( add_ip(old_info["rp_ip"], new_info["rp_ip"])) old_info["sp_hra"] += new_info["sp_hra"] old_info["sp_hitsallowed"] += new_info["sp_hitsallowed"] old_info["sp_k"] += new_info["sp_k"] old_info["sp_bb"] += new_info["sp_bb"] old_info["rp_hra"] += new_info["rp_hra"] old_info["rp_hitsallowed"] += new_info["rp_hitsallowed"] old_info["rp_k"] += new_info["rp_k"] old_info["rp_bb"] += new_info["rp_bb"] old_info["pa"] += new_info["pa"] old_info["gs"] += new_info["gs"] old_info["g"] += new_info["g"] old_info["fielding_gs"] += new_info["fielding_gs"] old_info["runsscored"] += new_info["runsscored"] old_info["rbi"] += new_info["rbi"] old_info["intentionallywalked"] += new_info["intentionallywalked"] old_info["sacflies"] += new_info["sacflies"] old_info["sacbunts"] += new_info["sacbunts"] old_info["gidp"] += new_info["gidp"] old_info["stolenbases"] += new_info["stolenbases"] old_info["caughtstealing"] += new_info["caughtstealing"] old_info["ab"] += new_info["ab"] old_info["sp_ip"] = add_ip(old_info["sp_ip"], new_info["sp_ip"]) old_info["sp_war"] += new_info["sp_war"] old_info["rp_ip"] = add_ip(old_info["rp_ip"], new_info["rp_ip"]) old_info["rp_war"] += new_info["rp_war"] old_info["fieldingip"] = add_ip(old_info["fieldingip"], new_info["fieldingip"]) old_info["bwar"] += new_info["bwar"] old_info["wraa"] += new_info["wraa"] old_info["batr"] += new_info["batr"] old_info["sp_bf"] += new_info["sp_bf"] old_info["rp_bf"] += new_info["rp_bf"] old_info["games"] += new_info["games"] old_info["gamesstarted"] += new_info["gamesstarted"] old_info["wsb"] += new_info["wsb"] old_info["ubr"] += new_info["ubr"] old_info["baserunningruns"] += new_info["baserunningruns"] old_info["bsrunchances"] += new_info["bsrunchances"] old_info["stealchances"] += new_info["stealchances"] old_info["zr"] += new_info["zr"] old_info["strikeouts"] += new_info["strikeouts"] old_info["walks"] += new_info["walks"] old_info["homeruns"] += new_info["homeruns"] old_info["hits"] += new_info["hits"] old_info["doubles"] += new_info["doubles"] old_info["triples"] += new_info["triples"] old_info["routinebiz"] += new_info["routinebiz"] old_info["routinebizfield"] += new_info["routinebizfield"] old_info["likelybiz"] += new_info["likelybiz"] old_info["likelybizfield"] += new_info["likelybizfield"] old_info["evenbiz"] += new_info["evenbiz"] old_info["evenbizfield"] += new_info["evenbizfield"] old_info["unlikelybiz"] += new_info["unlikelybiz"] old_info["unlikelybizfield"] += new_info["unlikelybizfield"] old_info["veryunlikelybiz"] += new_info["veryunlikelybiz"] old_info["veryunlikelybizfield"] += new_info["veryunlikelybizfield"] old_info["impossiblebiz"] += new_info["impossiblebiz"] old_info["totalbiz"] += new_info["totalbiz"] old_info["totalchances"] += new_info["totalchances"] old_info["assists"] += new_info["assists"] old_info["putouts"] += new_info["putouts"] old_info["stolenbaseattempts"] += new_info["stolenbaseattempts"] old_info["runnersthrownout"] += new_info["runnersthrownout"] old_info["errors"] += new_info["errors"] old_info["doubleplays"] += new_info["doubleplays"] old_info["sp_playershitbypitch"] += new_info["sp_playershitbypitch"] old_info["rp_playershitbypitch"] += new_info["rp_playershitbypitch"] old_info["timeshitbypitch"] += new_info["timeshitbypitch"] old_info["totalplaysfielded"] += new_info["totalplaysfielded"] old_info["sp_wildpitches"] += new_info["sp_wildpitches"] old_info["sp_pitch_stealchances"] += new_info["sp_pitch_stealchances"] old_info["sp_pitch_basesstolen"] += new_info["sp_pitch_basesstolen"] old_info["sp_pitch_stealattempts"] += new_info["sp_pitch_stealattempts"] old_info["sp_pitch_basesstolen_wild_pitch"] += new_info[ "sp_pitch_basesstolen_wild_pitch"] old_info["sp_earned_runs"] += new_info["sp_earned_runs"] old_info["sp_pitch_non_hr_hits"] += new_info["sp_pitch_non_hr_hits"] old_info["sp_bb_hp"] += new_info["sp_bb_hp"] old_info["sp_pitcherdoubleplays"] += new_info["sp_pitcherdoubleplays"] old_info["rp_wildpitches"] += new_info["rp_wildpitches"] old_info["rp_pitch_stealchances"] += new_info["rp_pitch_stealchances"] old_info["rp_pitch_basesstolen"] += new_info["rp_pitch_basesstolen"] old_info["rp_pitch_stealattempts"] += new_info["rp_pitch_stealattempts"] old_info["rp_pitch_basesstolen_wild_pitch"] += new_info[ "rp_pitch_basesstolen_wild_pitch"] old_info["rp_earned_runs"] += new_info["rp_earned_runs"] old_info["rp_pitch_non_hr_hits"] += new_info["rp_pitch_non_hr_hits"] old_info["rp_bb_hp"] += new_info["rp_bb_hp"] old_info["rp_pitcherdoubleplays"] += new_info["rp_pitcherdoubleplays"] if old_info["totalbiz"] > 0: old_info[ "playpct"] = old_info["totalplaysfielded"] / old_info["totalbiz"] old_info[ "routinefieldpct"] = old_info["routinebiz"] / old_info["totalbiz"] old_info["routinebizpct"] = old_info["routinebizfield"] / old_info[ "routinebiz"] if old_info["routinebiz"] > 0 else 0 old_info["likelybizpct"] = old_info["likelybizfield"] / old_info[ "likelybiz"] if old_info["likelybiz"] > 0 else 0 old_info["evenbizpct"] = old_info["evenbizfield"] / old_info[ "evenbiz"] if old_info["evenbiz"] > 0 else 0 old_info["unlikelybizpct"] = old_info["unlikelybizfield"] / old_info[ "unlikelybiz"] if old_info["unlikelybiz"] > 0 else 0 old_info["veryunlikelybizpct"] = old_info[ "veryunlikelybizfield"] / old_info["veryunlikelybiz"] if old_info[ "veryunlikelybiz"] > 0 else 0 return old_info
def regress_rto(player_data): avg_cabi = 0.0 avg_carm = 0.0 avg_ip = 0.0 attempts_by_arm = {} rto_by_arm = {} for player_info in player_data.values(): if player_info["runnersthrownout"] < 1 or player_info[ "cera"] < 2.0 or player_info["carm"] < 15 or player_info[ "fieldingip"] < 10.0: continue carm = player_info["carm"] cabi = player_info["cabi"] avg_carm = (avg_carm * avg_ip + carm * ip_to_num(player_info["fieldingip"])) / ( avg_ip + ip_to_num(player_info["fieldingip"])) avg_cabi = (avg_cabi * avg_ip + cabi * ip_to_num(player_info["fieldingip"])) / ( avg_ip + ip_to_num(player_info["fieldingip"])) avg_ip += ip_to_num(player_info["fieldingip"]) if carm in attempts_by_arm: attempts_by_arm[carm] = (player_info["stolenbaseattempts"] + attempts_by_arm[carm][0], add_ip(player_info["fieldingip"], attempts_by_arm[carm][1])) rto_by_arm[carm] = (player_info["runnersthrownout"] + rto_by_arm[carm][0], player_info["stolenbaseattempts"] + rto_by_arm[carm][1]) else: attempts_by_arm[carm] = (player_info["stolenbaseattempts"], player_info["fieldingip"]) rto_by_arm[carm] = (player_info["runnersthrownout"], player_info["stolenbaseattempts"]) X = [] y = [] for carm in attempts_by_arm: if attempts_by_arm[carm][1] == 0: continue X.append(carm) y.append(attempts_by_arm[carm][0] / attempts_by_arm[carm][1]) # initial attempt - we still need to throw out outliers att_model = sm.OLS(y, sm.add_constant(X)) att_results = att_model.fit() att_influence = att_results.get_influence() att_cooks_distance = att_influence.cooks_distance[0] att_cutoff = 4.0 / ((len(X) - 2) if len(X) > 3 else 1) att_old_len_X = len(X) X = [] y = [] i = 0 for carm in attempts_by_arm: if att_cooks_distance[i] < att_cutoff or att_old_len_X < 4: X.append(carm) y.append(attempts_by_arm[carm][0] / attempts_by_arm[carm][1]) i += 1 # Real prediction att_model = sm.OLS(y, sm.add_constant(X)) att_results = att_model.fit() X = [] y = [] for carm in attempts_by_arm: if attempts_by_arm[carm][1] == 0: continue X.append(carm) y.append(rto_by_arm[carm][0] / rto_by_arm[carm][1]) # initial attempt - we still need to throw out outliers rto_model = sm.OLS(y, sm.add_constant(X)) rto_results = rto_model.fit() rto_influence = rto_results.get_influence() rto_cooks_distance = rto_influence.cooks_distance[0] rto_cutoff = 4.0 / ((len(X) - 2) if len(X) > 3 else 1) rto_old_len_X = len(X) X = [] y = [] i = 0 for carm in attempts_by_arm: if rto_cooks_distance[i] < rto_cutoff or rto_old_len_X < 4: X.append(carm) y.append(rto_by_arm[carm][0] / rto_by_arm[carm][1]) i += 1 # Real prediction rto_model = sm.OLS(y, sm.add_constant(X)) rto_results = rto_model.fit() # attempts by arm, r2, rto/att by arm, r2 return (att_results.params, att_results.rsquared, rto_results.params, rto_results.rsquared, avg_carm, avg_cabi)
def calculate_defensive_stats(tourney_data, cards): positions = ["C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"] position_players_data = { "C": {}, "1B": {}, "2B": {}, "3B": {}, "SS": {}, "LF": {}, "CF": {}, "RF": {} } position_biz_breakdown = { "C": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "1B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "2B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "3B": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "SS": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "LF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "CF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 }, "RF": { "totbiz": 0, "routinebiz": 0, "likelybiz": 0, "evenbiz": 0, "unlikelybiz": 0, "veryunlikelybiz": 0, "routinebizfield": 0, "likelybizfield": 0, "evenbizfield": 0, "unlikelybizfield": 0, "veryunlikelybizfield": 0 } } total_fielding_ip_per_pos = {} # Use these to calculate avg play% tot_biz_per_pos = { "C": 0, "1B": 0, "2B": 0, "3B": 0, "SS": 0, "LF": 0, "CF": 0, "RF": 0 } tot_fieldedbiz_per_pos = { "C": 0, "1B": 0, "2B": 0, "3B": 0, "SS": 0, "LF": 0, "CF": 0, "RF": 0 } # Used to calculate outputs regression_data = { "C": {}, "1B": {}, "2B": {}, "3B": {}, "SS": {}, "LF": {}, "CF": {}, "RF": {} } r2_data = { "C": {}, "1B": {}, "2B": {}, "3B": {}, "SS": {}, "LF": {}, "CF": {}, "RF": {} } progress_bar = ProgressBar(len(tourney_data.keys()), "Reading tourney players for fielding data") for (key, pi) in tourney_data.items(): progress_bar.increment() player_positions = 0 for position in positions: if pi[position.lower() + "x"] > 0: player_positions += 1 if player_positions > 1: continue if pi["pos"] not in position_players_data: continue tot_biz_per_pos[pi["pos"]] += pi["totalbiz"] tot_fieldedbiz_per_pos[pi["pos"]] += pi["totalplaysfielded"] position_biz_breakdown[pi["pos"]]["totbiz"] += pi["totalbiz"] position_biz_breakdown[pi["pos"]]["routinebiz"] += pi["routinebiz"] position_biz_breakdown[pi["pos"]]["likelybiz"] += pi["likelybiz"] position_biz_breakdown[pi["pos"]]["evenbiz"] += pi["evenbiz"] position_biz_breakdown[pi["pos"]]["unlikelybiz"] += pi["unlikelybiz"] position_biz_breakdown[pi["pos"]]["veryunlikelybiz"] += pi["veryunlikelybiz"] position_biz_breakdown[pi["pos"]]["routinebizfield"] += pi["routinebizfield"] position_biz_breakdown[pi["pos"]]["likelybizfield"] += pi["likelybizfield"] position_biz_breakdown[pi["pos"]]["evenbizfield"] += pi["evenbizfield"] position_biz_breakdown[pi["pos"]]["unlikelybizfield"] += pi["unlikelybizfield"] position_biz_breakdown[pi["pos"]]["veryunlikelybizfield"] += pi["veryunlikelybizfield"] if pi["pos"] not in total_fielding_ip_per_pos: total_fielding_ip_per_pos[pi["pos"]] = 0 total_fielding_ip_per_pos[pi["pos"]] = add_ip(pi["fieldingip"], total_fielding_ip_per_pos[pi["pos"]]) position_players_data[pi["pos"]][key] = pi progress_bar.finish() for position in position_biz_breakdown: for key in position_biz_breakdown[position]: # Per game stats position_biz_breakdown[position][key] = position_biz_breakdown[position][key] / ip_to_num(total_fielding_ip_per_pos[position]) * 9 position_info = [ ("C", [ ("adj_play_pct", [ "carm", "cabi" ]), ("adj_zr", [ "carm", "cabi" ]), ("adj_totbiz", [ "ifrng" ]) ]), ("1B", [ ("adj_play_pct", [ "ifrng", "ifarm", "iferr", "tdp", "height" ]), ("adj_zr", [ "ifrng", "ifarm", "iferr", "tdp", "height" ]), ("adj_totbiz", [ "ifrng" ]) ]), ("2B", [ ("adj_play_pct", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_zr", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_totbiz", [ "ifrng" ]) ]), ("3B", [ ("adj_play_pct", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_zr", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_totbiz", [ "ifrng" ]) ]), ("SS", [ ("adj_play_pct", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_zr", [ "ifrng", "ifarm", "iferr", "tdp" ]), ("adj_totbiz", [ "ifrng" ]) ]), ("LF", [ ("adj_play_pct", [ "ofrng", "ofarm", "oferr" ]), ("adj_zr", [ "ofrng", "ofarm", "oferr" ]), ("adj_totbiz", [ "ofrng" ]) ]), ("CF", [ ("adj_play_pct", [ "ofrng", "ofarm", "oferr" ]), ("adj_zr", [ "ofrng", "ofarm", "oferr" ]), ("adj_totbiz", [ "ofrng" ]) ]), ("RF", [ ("adj_play_pct", [ "ofrng", "ofarm", "oferr" ]), ("adj_zr", [ "ofrng", "ofarm", "oferr" ]), ("adj_totbiz", [ "ofrng" ]) ]) ] progress_bar = ProgressBar(len(position_info) * len(position_info[0][1]), "Calculating fielding regressions") for position, pos_regs in position_info: _calculate_normalized_play_pct(position_players_data[position], position_biz_breakdown[position]) for (outcome_stat, pos_ratings) in pos_regs: reg, r2 = regress_defensive_stats(position_players_data[position], pos_ratings, outcome_stat) regression_data[position][outcome_stat] = reg r2_data[position][outcome_stat] = r2 progress_bar.increment() progress_bar.finish() for position in r2_data.keys(): for outcome_stat in r2_data[position].keys(): print(position, outcome_stat, "r2:", r2_data[position][outcome_stat]) print() outs_above_average_per_162 = {} zr_per_162 = {} for position in regression_data.keys(): outs_above_average_per_162[position] = _get_oaa_fn(position, regression_data, position_biz_breakdown) zr_per_162[position] = _get_zr_fn(position, regression_data) positions = ["C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"] progress_bar = ProgressBar(len(cards), "Calculate in defensive stats to cards") for card in cards: for position in positions: card[position + "_expected_zr"] = zr_per_162[position](card) card[position + "_expected_outs_above_avg"] = outs_above_average_per_162[position](card) progress_bar.increment() progress_bar.finish()
def _calc_woba_factors(player_data): X = [] y = [] X_wsb = [] y_wsb = [] total_ip = 0 total_runs = 0 pa = 0 walks = 0 hbp = 0 singles = 0 doubles = 0 triples = 0 homeruns = 0 ibb = 0 progress_bar = ProgressBar(len(player_data.keys()), "Reading wOBA values") for player in player_data.values(): pa += player["pa"] walks += player["walks"] hbp += player["timeshitbypitch"] singles += player["hits"] - (player["homeruns"] + player["doubles"] + player["triples"]) doubles += player["doubles"] triples += player["triples"] homeruns += player["homeruns"] ibb += player["intentionallywalked"] total_ip = add_ip(add_ip(total_ip, player["sp_ip"]), player["rp_ip"]) total_runs += player["runsscored"] progress_bar.increment() if player["pa"] < 20: continue X.append([ 1, player["walks"], player["timeshitbypitch"], player["hits"] - (player["homeruns"] + player["doubles"] + player["triples"]), player["doubles"], player["triples"], player["homeruns"] ]) y.append(player["woba"] * (player["pa"] - player["intentionallywalked"])) X_wsb.append([ player["stolenbases"], player["caughtstealing"], ((player["hits"] - (player["homeruns"] + player["doubles"] + player["triples"])) + player["walks"] + player["timeshitbypitch"] - player["intentionallywalked"]) ]) y_wsb.append(player["wsb"]) progress_bar.finish("\n") results = sm.OLS(y, X).fit() wsb_results = sm.OLS(y_wsb, X_wsb).fit() avg_woba = (results.params[0] + results.params[1] * walks + results.params[2] * hbp + results.params[3] * singles + results.params[4] * doubles + results.params[5] * triples + results.params[6] * homeruns) / (pa - ibb) return { "lg_woba": avg_woba, "woba_intcpt": results.params[0], "walks_factor": results.params[1], "hbp_factor": results.params[2], "singles_factor": results.params[3], "doubles_factor": results.params[4], "triples_factor": results.params[5], "homeruns_factor": results.params[6], "r_2": results.rsquared, "runSB": wsb_results.params[0], "runCS": wsb_results.params[1], "lgwSB": wsb_results.params[2], "wsb_r_2": wsb_results.rsquared, "outs_per_run": (wsb_results.params[1] - 0.075) / 2, "runs_per_win": 9 * (total_runs / ip_to_num(total_ip)) * 1.5 + 3, }