def strike_zone(dataframe):
    fig, ax = plt.subplots()

    # change value of type of pitch to 1 and 0 so that it can be used as labels for our model
    dataframe.type = dataframe.type.map({'S': 1, 'B': 0})

    # drop NaN values from the columns we need
    dataframe = dataframe.dropna(subset=['plate_x', 'plate_z', 'type'])

    # create scatter plot of location of pitches and colored according to type
    plt.scatter(x=dataframe.plate_x, y=dataframe.plate_z,
                c=dataframe.type, cmap=plt.cm.coolwarm, alpha=0.25)
    plt.title(dataframe['player_name'][4] + ' Strike Zone')

    # split data into training and validation sets
    training_set, validation_set = train_test_split(dataframe, random_state=1)

    # create model
    classifier = SVC(kernel='rbf', gamma=0.5, C=1)
    # train model
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type)
    # draw boundary onto graph
    draw_boundary(ax, classifier)

    # print the score of the model
    print(classifier.score(
        validation_set[['plate_x', 'plate_z']], validation_set.type))

    # ensure same axis and display graphs
    ax.set_ylim(-2, 6)
    ax.set_xlim(-3, 3)
    plt.show()
def strike_zone(player):
  fig, ax = plt.subplots()

  #print(player.description.unique())
  #print(player.type.unique())

  player["type"] = player["type"].map({'S':1, 'B':0})
  print(player["type"])

  print(player["plate_x"])
  player = player.dropna(subset=['plate_x', 'plate_z', 'type'])

  plt.scatter(x=player["plate_x"], y=player["plate_z"], c=player["type"], cmap=plt.cm.coolwarm, alpha=0.25)

  training_set, validation_set = train_test_split(player, random_state=1)

  classifier = SVC(kernel = 'rbf', gamma = 3, C = 1)
  classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type'])

  ax.set_ylim(-2, 6)
  ax.set_xlim(-3, 3)
  draw_boundary(ax, classifier)

  plt.show()

  print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set['type']))
Exemple #3
0
def svc(player):
  player['type'] = player['type'].map({'S':1 , 'B':0})
  player = player.dropna(subset = ['plate_x','plate_z','type'])
  fig, ax = plt.subplots()
  plt.scatter(x=player.plate_x,y=player.plate_z,c=player.type,cmap = plt.cm.coolwarm , alpha = 0.25)
  training_set,validation_set = train_test_split(player,random_state = 1)
  classifier = SVC(kernel = 'rbf', gamma = 3, C=1)
  classifier.fit(training_set[['plate_x','plate_z']],training_set['type'])
  draw_boundary(ax,classifier)
  print(classifier.score(validation_set[['plate_x','plate_z']],validation_set['type']))
  ax.set_ylim(-2,6)
  ax.set_xlim(-3,3)
  plt.show()
Exemple #4
0
def find_strike_zone(player):
    player.type = player.type.map({'S': 1, 'B': 0})
    player = player.dropna(subset=['plate_x', 'plate_z', 'type'])
    plt.scatter(x=player['plate_x'],
                y=player['plate_z'],
                c=player['type'],
                cmap=plt.cm.coolwarm,
                alpha=0.25)
    training_set, validation_set = train_test_split(player, random_state=1)
    classifier = SVC(kernel='rbf')
    classifier.fit(training_set[["plate_x", "plate_z"]], training_set.type)
    score = classifier.score(validation_set[['plate_x', 'plate_z']],
                             validation_set.type)
    print(score)
    draw_boundary(ax, classifier)
    plt.show()
Exemple #5
0
def investigate_strike_zone(player):
    fig, ax = plt.subplots()

    # clear & relabel data
    player.type = player.type.map({'S': 1, 'B': 0})
    player = player.dropna(subset=['plate_x', 'plate_z', 'type'])

    # plot scatter graph
    plt.scatter(player.plate_x,
                player.plate_z,
                c=player.type,
                cmap=plt.cm.coolwarm,
                alpha=0.25)

    # split data into training and validation data
    training_set, validation_set = train_test_split(player, random_state=1)

    # search for best gamma and C for the model
    best_gamma = 0
    best_C = 0
    best_score = 0
    for i in range(1, 21):
        for j in range(1, 21):
            classifier = SVC(kernel='rbf', gamma=i, C=j)
            classifier.fit(training_set[['plate_x', 'plate_z']],
                           training_set['type'])
            score = classifier.score(validation_set[['plate_x', 'plate_z']],
                                     validation_set['type'])
            if score > best_score:
                best_gamma = i
                best_C = j
                best_score = score
    print('Best gamma: ' + str(best_gamma))
    print('Best C: ' + str(best_C))
    print('Best score: ' + str(best_score))

    # retrain model with best parameters
    classifier = SVC(kernel='rbf', gamma=best_gamma, C=best_C)
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type'])

    # plot boundary in scatter plot
    draw_boundary(ax, classifier)
    plt.show()
    plt.clf()
Exemple #6
0
def test(aaron_judge):
    fig, ax = plt.subplots()
    aaron_judge["type"] = aaron_judge["type"].map({"S": 1, "B": 0})
    # print(aaron_judge.type.unique())
    # print(aaron_judge["plate_x"])
    aaron_judge = aaron_judge.dropna(subset=["plate_x", "plate_z", "type"])
    plt.scatter(x=aaron_judge.plate_x,
                y=aaron_judge.plate_z,
                c=aaron_judge.type,
                cmap=plt.cm.coolwarm,
                alpha=0.25)
    training_set, test_set = train_test_split(aaron_judge, random_state=1)
    classifier = SVC(kernel="rbf", gamma=3, C=1)
    classifier.fit(training_set[["plate_x", "plate_z"]], training_set["type"])
    draw_boundary(ax, classifier)
    print(classifier.score(test_set[["plate_x", "plate_z"]], test_set["type"]))
    ax.set_ylim(-2, 6)
    ax.set_xlim(-3, 3)
    plt.show()
Exemple #7
0
def find_strike_zone(data_set):
  data_set['type'] = data_set['type'].map({'S':1, 'B':2})

  # 5
  print(data_set.type)

  # 6
  print(data_set['plate_x'])

  # 7
  data_set = data_set.dropna(subset = ['plate_x', 'plate_z', 'type'])

  # 8

  fig, ax = plt.subplots()

  plt.scatter(x = data_set['plate_x'], y = data_set['plate_z'], c = data_set['type'],cmap = plt.cm.coolwarm, alpha = 0.25)


  # 9
  training_set, validation_set = train_test_split(data_set, random_state = 1)

  # 10
  largest = {'value': 0, 'gamma': 1, 'C': 1}
  for gamma in range(1,5):
    for C in range(1,5):
      classifier = SVC(kernel = 'rbf', gamma = gamma, C = C)
      classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type'])
      score = classifier.score(validation_set[['plate_x', 'plate_z']], validation_set[['type']])
      if (score > largest['value']):
        largest['value'] = score
        largest['gamma'] = gamma
        largest['C'] = C

  print(largest)


  # 16
  ax.set_ylim(-2,6)
  ax.set_xlim(-3,3)
  draw_boundary(ax, classifier)
  plt.show()
Exemple #8
0
def svm(dataset):

  dataset['type'] = dataset['type'].map({'S':1, 'B':0})
  dataset = dataset.dropna(subset = ['plate_x', 'plate_z', 'type'])

  fig, ax = plt.subplots()
  plt.scatter(dataset.plate_x, dataset.plate_z, c=dataset.type, cmap=plt.cm.coolwarm, alpha=0.25)

  training_set, validation_set = train_test_split(dataset, random_state=1)

  training_data = training_set[['plate_x','plate_z']]
  training_labels = training_set['type']

  classifier =SVC(kernel='rbf', gamma=3, C=1)
  classifier.fit(training_data, training_labels)
  draw_boundary(ax, classifier)
  ax.set_ylim(-2,6)
  ax.set_xlim(-3,3)
  plt.show()
  
  print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set.type))
def find_strike_zone(data_set):
    data_set['type'] = data_set['type'].map({'S': 1, 'B': 0})
    data_set = data_set.dropna(subset=['type', 'plate_x', 'plate_z'])

    plt.scatter(x=data_set['plate_x'],
                y=data_set.plate_z,
                c=data_set.type,
                cmap=plt.cm.coolwarm,
                alpha=0.5)

    training_set, validation_set = train_test_split(data_set, random_state=1)

    classifier = SVC(kernel='rbf', gamma=1, C=3)
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type)

    score = classifier.score(validation_set[['plate_x', 'plate_z']],
                             validation_set.type)
    print(score)
    draw_boundary(ax, classifier)
    ax.set_ylim(-2, 6)
    ax.set_xlim(-3, 3)
    plt.show()
Exemple #10
0
def graph_player(index, player_name, player):
    fig, ax = plt.subplots()

    plt.title(player_name)

    # Standardize type column (using Strike & Ball)
    player['type'] = player['type'].map({'S': 1, 'B': 0})
    player = player.dropna(subset=['plate_x', 'plate_z', 'type'])

    # Split data set
    training_set, validation_set = train_test_split(player, random_state=1)

    # Create SVM classifier (RFB kernel)
    classifier = SVC(kernel='rbf', gamma=3, C=1)

    # Train model using learn data set
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type'])

    # Get Score using validation data set
    score = classifier.score(validation_set[['plate_x', 'plate_z']],
                             validation_set['type'])

    print('Score:', score)

    # Graph plate_x vs plate_z (Strike are in red, Ball in blue)
    ax.set_ylim(-2, 6)
    ax.set_xlim(-3, 3)
    plt.scatter(player.plate_x,
                player.plate_z,
                c=player.type,
                cmap=plt.cm.coolwarm,
                alpha=0.5)

    # Draw SVM boundries
    draw_boundary(ax, classifier)

    plt.show()
def strike_zone(pitcher, color):
    # First - remap the strings to binary numbers
    pitcher['type'] = pitcher['type'].map({'B': 0, 'S': 1})
    # Next - Drop NA values to only view balls and strikes
    pitcher = pitcher.dropna(subset=['type', 'plate_x', 'plate_z'])
    # Set up scatter plot data
    plt.scatter(pitcher['plate_x'],
                pitcher['plate_z'],
                c=pitcher['type'],
                cmap=color,
                alpha=0.15)
    # Establish Training and Validation Sets
    training_set, validation_set = train_test_split(pitcher,
                                                    train_size=0.8,
                                                    test_size=0.2,
                                                    random_state=1)
    # Establish Classifier
    classifier = SVC(kernel='rbf', C=1, gamma=7)
    # Run the Model Fit
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set['type'])
    # Draw the Boundaries
    draw_boundary(
        ax, classifier
    )  # TO be commented out if want to see a cleaner plot overlay with the different people. Also cannot function with more than 2 parameters.
Exemple #12
0
def find_strike_zone(data_set):
    #print(aaron_judge.description.unique())
    #print(aaron_judge.type.unique())
    data_set['type'] = data_set['type'].map({'S': 1, 'B': 0})
    #print(aaron_judge['type'])
    #print(aaron_judge['plate_x'])
    data_set = data_set.dropna(subset=['type', 'plate_x', 'plate_z'])
    #print(aaron_judge['type'])
    plt.scatter(x=data_set['plate_x'],
                y=data_set['plate_z'],
                c=data_set['type'],
                cmap=plt.cm.coolwarm,
                alpha=0.25)
    training_set, validation_set = train_test_split(data_set, random_state=1)
    classifier = SVC(kernel='rbf', gamma=3.2, C=0.5)
    classifier.fit(training_set[['plate_x', 'plate_z']], training_set.type)
    draw_boundary(ax, classifier)
    print(
        classifier.score(validation_set[['plate_x', 'plate_z']],
                         validation_set.type))
    ax.set_ylim(-2, 6)
    ax.set_xlim(-3, 3)

    plt.show()
'''
10.
Next, create an SVC named classifier with kernel = 'rbf'. For right now, don’t worry about setting the C or gamma parameters.


The SVC should have kernel = 'rbf'.

11.
Call classifier‘s .fit() method. This method should take two parameters:

The training data. This is the plate_x column and the plate_z column in training_set.
The labels. This is the type column in training_set.
The code below shows and example of selecting two columns from a DataFrame:
'''
two_columns = data_frame[['A', 'B']]
'''
The first parameter should be training_set[['plate_x', 'plate_z']].

The second parameter should be training_set['type'].

12.
To visualize the SVM, call the draw_boundary function. This is a function that we wrote ourselves - you won’t find it in scikit-learn.

This function takes two parameters:

The axes of your graph. For us, this is the ax variable that we defined at the top of your code.
The trained SVM. For us, this is classifier. Make sure you’ve called .fit() before trying to visualize the decision boundary.
Run your code to see the predicted strike zone!

Note that the decision boundary will be drawn based on the size of the current axes. So if you call draw_boundary before calling scatter function, you will only see the boundary as a small square.
fig, ax = plt.subplots()
plt.scatter(aaron_judge.plate_x,
            aaron_judge.plate_z,
            c=aaron_judge.type,
            cmap=plt.cm.coolwarm,
            alpha=0.25)
plt.xlabel('How far left or right the pitch is from the center of home plate')
plt.ylabel('How high off the ground the pitch was')
plt.title('Graph where the strikes are red and the balls blue')

## Building the SVM for AAron Judge
training_set, validation_set, = train_test_split(aaron_judge, random_state=1)

classifier = SVC(kernel='rbf', gamma=100, C=100)
classifier.fit(training_set[['plate_x', 'plate_z']], training_set[['type']])
draw_boundary(ax, classifier)

#print(classifier.score(validation_set[['plate_x', 'plate_z']], validation_set[['type']]))

plt.show()
plt.clf()

## Optimising the SVM

for i in range(1, 11, 3):
    for n in range(1, 11, 3):
        classifier = SVC(kernel='rbf', gamma=i, C=n)
        classifier.fit(training_set[['plate_x', 'plate_z']],
                       training_set[['type']])
        score = classifier.score(validation_set[['plate_x', 'plate_z']],
                                 validation_set[['type']])