def strike_zone(dataframe): fig, ax = plt.subplots() # change value of type of pitch to 1 and 0 so that it can be used as labels for our model dataframe.type = dataframe.type.map({'S': 1, 'B': 0}) # drop NaN values from the columns we need dataframe = dataframe.dropna(subset=['plate_x', 'plate_z', 'type']) # create scatter plot of location of pitches and colored according to type plt.scatter(x=dataframe.plate_x, y=dataframe.plate_z, c=dataframe.type, cmap=plt.cm.coolwarm, alpha=0.25) plt.title(dataframe['player_name'][4] + ' Strike Zone') # split data into training and validation sets training_set, validation_set = train_test_split(dataframe, random_state=1) # create model classifier = SVC(kernel='rbf', gamma=0.5, C=1) # train model classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type) # draw boundary onto graph draw_boundary(ax, classifier) # print the score of the model print(classifier.score( validation_set[['plate_x', 'plate_z']], validation_set.type)) # ensure same axis and display graphs ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) plt.show()
def strike_zone(player): fig, ax = plt.subplots() #print(player.description.unique()) #print(player.type.unique()) player["type"] = player["type"].map({'S':1, 'B':0}) print(player["type"]) print(player["plate_x"]) player = player.dropna(subset=['plate_x', 'plate_z', 'type']) plt.scatter(x=player["plate_x"], y=player["plate_z"], c=player["type"], cmap=plt.cm.coolwarm, alpha=0.25) training_set, validation_set = train_test_split(player, random_state=1) classifier = SVC(kernel = 'rbf', gamma = 3, C = 1) classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) draw_boundary(ax, classifier) plt.show() print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type']))
def svc(player): player['type'] = player['type'].map({'S':1 , 'B':0}) player = player.dropna(subset = ['plate_x','plate_z','type']) fig, ax = plt.subplots() plt.scatter(x=player.plate_x,y=player.plate_z,c=player.type,cmap = plt.cm.coolwarm , alpha = 0.25) training_set,validation_set = train_test_split(player,random_state = 1) classifier = SVC(kernel = 'rbf', gamma = 3, C=1) classifier.fit(training_set[['plate_x','plate_z']],training_set['type']) draw_boundary(ax,classifier) print(classifier.score(validation_set[['plate_x','plate_z']],validation_set['type'])) ax.set_ylim(-2,6) ax.set_xlim(-3,3) plt.show()
def find_strike_zone(player): player.type = player.type.map({'S': 1, 'B': 0}) player = player.dropna(subset=['plate_x', 'plate_z', 'type']) plt.scatter(x=player['plate_x'], y=player['plate_z'], c=player['type'], cmap=plt.cm.coolwarm, alpha=0.25) training_set, validation_set = train_test_split(player, random_state=1) classifier = SVC(kernel='rbf') classifier.fit(training_set[["plate_x", "plate_z"]], training_set.type) score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set.type) print(score) draw_boundary(ax, classifier) plt.show()
def investigate_strike_zone(player): fig, ax = plt.subplots() # clear & relabel data player.type = player.type.map({'S': 1, 'B': 0}) player = player.dropna(subset=['plate_x', 'plate_z', 'type']) # plot scatter graph plt.scatter(player.plate_x, player.plate_z, c=player.type, cmap=plt.cm.coolwarm, alpha=0.25) # split data into training and validation data training_set, validation_set = train_test_split(player, random_state=1) # search for best gamma and C for the model best_gamma = 0 best_C = 0 best_score = 0 for i in range(1, 21): for j in range(1, 21): classifier = SVC(kernel='rbf', gamma=i, C=j) classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type']) if score > best_score: best_gamma = i best_C = j best_score = score print('Best gamma: ' + str(best_gamma)) print('Best C: ' + str(best_C)) print('Best score: ' + str(best_score)) # retrain model with best parameters classifier = SVC(kernel='rbf', gamma=best_gamma, C=best_C) classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) # plot boundary in scatter plot draw_boundary(ax, classifier) plt.show() plt.clf()
def test(aaron_judge): fig, ax = plt.subplots() aaron_judge["type"] = aaron_judge["type"].map({"S": 1, "B": 0}) # print(aaron_judge.type.unique()) # print(aaron_judge["plate_x"]) aaron_judge = aaron_judge.dropna(subset=["plate_x", "plate_z", "type"]) plt.scatter(x=aaron_judge.plate_x, y=aaron_judge.plate_z, c=aaron_judge.type, cmap=plt.cm.coolwarm, alpha=0.25) training_set, test_set = train_test_split(aaron_judge, random_state=1) classifier = SVC(kernel="rbf", gamma=3, C=1) classifier.fit(training_set[["plate_x", "plate_z"]], training_set["type"]) draw_boundary(ax, classifier) print(classifier.score(test_set[["plate_x", "plate_z"]], test_set["type"])) ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) plt.show()
def find_strike_zone(data_set): data_set['type'] = data_set['type'].map({'S':1, 'B':2}) # 5 print(data_set.type) # 6 print(data_set['plate_x']) # 7 data_set = data_set.dropna(subset = ['plate_x', 'plate_z', 'type']) # 8 fig, ax = plt.subplots() plt.scatter(x = data_set['plate_x'], y = data_set['plate_z'], c = data_set['type'],cmap = plt.cm.coolwarm, alpha = 0.25) # 9 training_set, validation_set = train_test_split(data_set, random_state = 1) # 10 largest = {'value': 0, 'gamma': 1, 'C': 1} for gamma in range(1,5): for C in range(1,5): classifier = SVC(kernel = 'rbf', gamma = gamma, C = C) classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set[['type']]) if (score > largest['value']): largest['value'] = score largest['gamma'] = gamma largest['C'] = C print(largest) # 16 ax.set_ylim(-2,6) ax.set_xlim(-3,3) draw_boundary(ax, classifier) plt.show()
def svm(dataset): dataset['type'] = dataset['type'].map({'S':1, 'B':0}) dataset = dataset.dropna(subset = ['plate_x', 'plate_z', 'type']) fig, ax = plt.subplots() plt.scatter(dataset.plate_x, dataset.plate_z, c=dataset.type, cmap=plt.cm.coolwarm, alpha=0.25) training_set, validation_set = train_test_split(dataset, random_state=1) training_data = training_set[['plate_x','plate_z']] training_labels = training_set['type'] classifier =SVC(kernel='rbf', gamma=3, C=1) classifier.fit(training_data, training_labels) draw_boundary(ax, classifier) ax.set_ylim(-2,6) ax.set_xlim(-3,3) plt.show() print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set.type))
def find_strike_zone(data_set): data_set['type'] = data_set['type'].map({'S': 1, 'B': 0}) data_set = data_set.dropna(subset=['type', 'plate_x', 'plate_z']) plt.scatter(x=data_set['plate_x'], y=data_set.plate_z, c=data_set.type, cmap=plt.cm.coolwarm, alpha=0.5) training_set, validation_set = train_test_split(data_set, random_state=1) classifier = SVC(kernel='rbf', gamma=1, C=3) classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type) score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set.type) print(score) draw_boundary(ax, classifier) ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) plt.show()
def graph_player(index, player_name, player): fig, ax = plt.subplots() plt.title(player_name) # Standardize type column (using Strike & Ball) player['type'] = player['type'].map({'S': 1, 'B': 0}) player = player.dropna(subset=['plate_x', 'plate_z', 'type']) # Split data set training_set, validation_set = train_test_split(player, random_state=1) # Create SVM classifier (RFB kernel) classifier = SVC(kernel='rbf', gamma=3, C=1) # Train model using learn data set classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) # Get Score using validation data set score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type']) print('Score:', score) # Graph plate_x vs plate_z (Strike are in red, Ball in blue) ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) plt.scatter(player.plate_x, player.plate_z, c=player.type, cmap=plt.cm.coolwarm, alpha=0.5) # Draw SVM boundries draw_boundary(ax, classifier) plt.show()
def strike_zone(pitcher, color): # First - remap the strings to binary numbers pitcher['type'] = pitcher['type'].map({'B': 0, 'S': 1}) # Next - Drop NA values to only view balls and strikes pitcher = pitcher.dropna(subset=['type', 'plate_x', 'plate_z']) # Set up scatter plot data plt.scatter(pitcher['plate_x'], pitcher['plate_z'], c=pitcher['type'], cmap=color, alpha=0.15) # Establish Training and Validation Sets training_set, validation_set = train_test_split(pitcher, train_size=0.8, test_size=0.2, random_state=1) # Establish Classifier classifier = SVC(kernel='rbf', C=1, gamma=7) # Run the Model Fit classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type']) # Draw the Boundaries draw_boundary( ax, classifier ) # TO be commented out if want to see a cleaner plot overlay with the different people. Also cannot function with more than 2 parameters.
def find_strike_zone(data_set): #print(aaron_judge.description.unique()) #print(aaron_judge.type.unique()) data_set['type'] = data_set['type'].map({'S': 1, 'B': 0}) #print(aaron_judge['type']) #print(aaron_judge['plate_x']) data_set = data_set.dropna(subset=['type', 'plate_x', 'plate_z']) #print(aaron_judge['type']) plt.scatter(x=data_set['plate_x'], y=data_set['plate_z'], c=data_set['type'], cmap=plt.cm.coolwarm, alpha=0.25) training_set, validation_set = train_test_split(data_set, random_state=1) classifier = SVC(kernel='rbf', gamma=3.2, C=0.5) classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type) draw_boundary(ax, classifier) print( classifier.score(validation_set[['plate_x', 'plate_z']], validation_set.type)) ax.set_ylim(-2, 6) ax.set_xlim(-3, 3) plt.show()
''' 10. Next, create an SVC named classifier with kernel = 'rbf'. For right now, don’t worry about setting the C or gamma parameters. The SVC should have kernel = 'rbf'. 11. Call classifier‘s .fit() method. This method should take two parameters: The training data. This is the plate_x column and the plate_z column in training_set. The labels. This is the type column in training_set. The code below shows and example of selecting two columns from a DataFrame: ''' two_columns = data_frame[['A', 'B']] ''' The first parameter should be training_set[['plate_x', 'plate_z']]. The second parameter should be training_set['type']. 12. To visualize the SVM, call the draw_boundary function. This is a function that we wrote ourselves - you won’t find it in scikit-learn. This function takes two parameters: The axes of your graph. For us, this is the ax variable that we defined at the top of your code. The trained SVM. For us, this is classifier. Make sure you’ve called .fit() before trying to visualize the decision boundary. Run your code to see the predicted strike zone! Note that the decision boundary will be drawn based on the size of the current axes. So if you call draw_boundary before calling scatter function, you will only see the boundary as a small square.
fig, ax = plt.subplots() plt.scatter(aaron_judge.plate_x, aaron_judge.plate_z, c=aaron_judge.type, cmap=plt.cm.coolwarm, alpha=0.25) plt.xlabel('How far left or right the pitch is from the center of home plate') plt.ylabel('How high off the ground the pitch was') plt.title('Graph where the strikes are red and the balls blue') ## Building the SVM for AAron Judge training_set, validation_set, = train_test_split(aaron_judge, random_state=1) classifier = SVC(kernel='rbf', gamma=100, C=100) classifier.fit(training_set[['plate_x', 'plate_z']], training_set[['type']]) draw_boundary(ax, classifier) #print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set[['type']])) plt.show() plt.clf() ## Optimising the SVM for i in range(1, 11, 3): for n in range(1, 11, 3): classifier = SVC(kernel='rbf', gamma=i, C=n) classifier.fit(training_set[['plate_x', 'plate_z']], training_set[['type']]) score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set[['type']])