import sys from data.pitchfx import PitchFxDataset import pandas as pd from tables.utils import add_header, change_fontsize, add_divider sys.path.extend(['/home/simon/Documents/601-Project/code']) pitchfx = PitchFxDataset() pitches = pitchfx.pitchfx # ---------------- compute experiements -------------------- # count pitches["ball_bin"] = pd.cut(x=pitches["b_count"], bins=[0, 2, 3], labels=["[0,2]", "{3}"], include_lowest=True) pitches["strike_bin"] = pd.cut(x=pitches["s_count"], bins=[0, 1, 2], labels=["[0,1]", "{2}"], include_lowest=True) # movement pitches["move_horiz"] = pd.cut(x=pitches["pfx_x_std"], bins=[-60, 0, 60], labels=["Inward", "Outward"], include_lowest=True) pitches["move_vert"] = pd.cut(x=pitches["pfx_z"], bins=[-20, 5, 20], labels=["Upward", "Downward"],
import sys sys.path.extend(['/home/simon/Documents/601-Project/code']) from data.pitchfx import PitchFxDataset import matplotlib.pyplot as plt import pandas as pd import numpy as np pitchfx = PitchFxDataset() pd.crosstab(pitchfx.pitchfx["type"], pitchfx.pitchfx["type_from_sz"]) df = pitchfx.group_by( umpire_HP="all", stand="all", ) # to iterate through all: for levels, d in df: print(len(d), levels) plt.scatter(pitchfx.pitchfx["pz"][:1000], pitchfx.pitchfx["pz_std"][:1000]) plt.hist(pitchfx.pitchfx["pz_std"] - pitchfx.pitchfx["pz"]) plt.show()
vals[:N, 2] = np.linspace(2 / 256, 76 / 256, N) vals[N:, :] = vals[N - 1, :] newcmp16 = matplotlib.colors.ListedColormap(vals) newcmp16.set_bad(color='white') pd.set_option('display.max_rows', 100) pd.set_option('display.max_columns', 20) pd.set_option('display.width', 1000) # ---------------- SETUP ------------------ plt.style.use("seaborn") sys.path.extend(['/home/simon/Documents/601-Project/code']) encoder_path = "./data/models/encoding/all_fit.txt" pitchfx = PitchFxDataset() # Balls and strike counts df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2]) with open(encoder_path, "rb") as f: _, embeddings, groups, _, _ = pickle.load(f) ids = [groups.index(gr) for gr, _ in df if gr in groups] embeddings = embeddings[ids, :] groups = [groups[i] for i in ids] df = pd.DataFrame(embeddings, index=pd.MultiIndex.from_tuples(groups)).reset_index()
import sys from data.pitchfx import PitchFxDataset import pandas as pd from tables.utils import add_header, change_fontsize, add_divider sys.path.extend(['/home/simon/Documents/601-Project/code']) pitchfx = PitchFxDataset() pd.options.display.max_colwidth = 10000 summary = pd.DataFrame(columns=["split", "count", "min", "med", "max"]) # ---------------- compute experiements -------------------- # count df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2]) counts = df.agg("count")["px"] stats = counts.agg(["count", "min", "median", "max"]).to_numpy() summary.loc[1] = [ "Umpire (39),\newline Ball count ([0,2], {3}),\newline Strike count ([0,1], {2})", *stats ] # movement df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20]) counts = df.agg("count")["px"] stats = counts.agg(["count", "min", "median", "max"]).to_numpy() summary.loc[2] = [ "Umpire (39),\newline Horiz. movement (inward, outward),\newline Vert. movement (upward, downward)", *stats ] # batter/pitcher
vals[:N, 2] = np.linspace(2 / 256, 76 / 256, N) vals[N:, :] = vals[N - 1, :] newcmp16 = matplotlib.colors.ListedColormap(vals) newcmp16.set_bad(color='white') pd.set_option('display.max_rows', 100) pd.set_option('display.max_columns', 20) pd.set_option('display.width', 1000) # ---------------- SETUP ------------------ plt.style.use("seaborn") sys.path.extend(['/home/simon/Documents/601-Project/code']) encoder_path = "./data/models/encoding/all_fit.txt" pitchfx = PitchFxDataset() # Balls and strike counts pitchfx.pitchfx[ "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"] df = pitchfx.group_by(umpire_HP="all", score_diff_b_p=[-25, -2, 1, 25], inning=[1, 6, 18]) with open(encoder_path, "rb") as f: _, embeddings, groups, _, _ = pickle.load(f) ids = [groups.index(gr) for gr, _ in df if gr in groups]
from data.pitchfx import PitchFxDataset from models.classification.StrikeZoneLearner import StrikeZoneLearner import numpy as np import pandas as pd import pickle import matplotlib.pyplot as plt from plot.utils import plot_pitches plt.style.use("seaborn") sys.path.extend(['/home/simon/Documents/601-Project/code']) with open("./data/models/classifiers/umpire_balls_strikes_roc_auc_svc_klr.txt", "rb") as f: szl = pickle.load(f) pitchfx = PitchFxDataset() df = pitchfx.group_by(umpire_HP="all", b_count=[0, 2, 3], s_count=[0, 1, 2]) pitchfx.pitchfx["pfx_x"] = np.where(pitchfx.pitchfx["stand"] == "L", -pitchfx.pitchfx["pfx_x"], pitchfx.pitchfx["pfx_x"]) df = pitchfx.group_by(umpire_HP="all", pfx_x=[-60, 0, 60], pfx_z=[-20, 5, 20]) df = pitchfx.group_by(umpire_HP="all", p_throws="all", stand="all") pitchfx.pitchfx[ "score_diff_b_p"] = pitchfx.pitchfx["b_score"] - pitchfx.pitchfx["p_score"] df = pitchfx.group_by(umpire_HP="all", score_diff_b_p=[-25, -2, 1, 25], inning=[1, 6, 18])