forked from enalisnick/NBA_shot_analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
130 lines (103 loc) · 5.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from data.data_accessors import load_seasons
import numpy as np
from models.classifiers import *
"""
This file contains the logic for experiment #1 (prediction of shot
outcome using only spatial features), experiment #2 (prediction
based on location and spatial features), and experiment #3 (using
spatial, position, and shot-type features)--but not the neural
network component of #3.
Running this file will output accuracy results to logs created in
the current directory. Per-position visualizations will be placed
in the following directory structure which will assume exists:
./position_graphs/[position abbreviation]/
written by Eric Nalisnick, enalisnick@gmail.com, Nov 2014
"""
def experiment_1(train_seasons, test_seasons):
""" Experiment #1: Use only spatial features for prediction """
# load training data
made_data, missed_data = load_seasons(train_seasons, split_flag=True)
train_features, train_labels = load_seasons(train_seasons)
# load test data
test_features, test_labels = load_seasons(test_seasons)
###### non-parametric models #####
# Logistic Regression
log_reg = LogisticRegression()
log_reg.train(train_features, train_labels)
log_reg.test(test_features, test_labels)
log_reg.visualize()
# k-Nearest Neighbors
num_of_neighbors = 5
kNN_model = kNN(num_of_neighbors)
kNN_model.train(train_features, train_labels)
kNN_model.test(test_features, test_labels)
kNN_model.visualize()
###### parametric models #####
# 2-D Gaussian Classifier
gauss_model = Gaussian2DClassifier()
gauss_model.train(made_data, missed_data)
gauss_model.test(test_features, test_labels)
gauss_model.visualize()
# Mixture of 2-D Gaussians Classifier
num_of_mixtures = 3
gauss_mixture_model = GaussianMixtureClassifier(num_of_mixtures)
gauss_mixture_model.train(made_data, missed_data, 10)
gauss_mixture_model.test(test_features, test_labels)
gauss_mixture_model.visualize()
def experiment_2(train_seasons, test_seasons):
""" Experiment #2: Use spatial and position features for prediction """
positions = ['G', 'F', 'C']
for position in positions:
# load training data
made_data, missed_data = load_seasons(seasons=train_seasons, split_flag=True, attributes=[position])
train_features, train_labels = load_seasons(seasons=train_seasons, attributes=[position])
# load test data
test_features, test_labels = load_seasons(seasons=test_seasons, attributes=[position])
output_directory = "./position_graphs/%s/"%(position)
###### non-parametric models #####
# Logistic Regression
log_reg = LogisticRegression()
log_reg.train(train_features, train_labels)
log_reg.test(test_features, test_labels)
log_reg.visualize(output_dir=output_directory)
# k-Nearest Neighbors
num_of_neighbors = 5
kNN_model = kNN(num_of_neighbors)
kNN_model.train(train_features, train_labels)
kNN_model.test(test_features, test_labels)
kNN_model.visualize(output_dir=output_directory)
###### parametric models #####
# 2-D Gaussian Classifier
gauss_model = Gaussian2DClassifier()
gauss_model.train(made_data, missed_data)
gauss_model.test(test_features, test_labels)
gauss_model.visualize(output_dir=output_directory)
# Mixture of 2-D Gaussians Classifier
num_of_mixtures = 3
gauss_mixture_model = GaussianMixtureClassifier(num_of_mixtures)
gauss_mixture_model.train(made_data, missed_data, 10)
gauss_mixture_model.test(test_features, test_labels)
gauss_mixture_model.visualize(output_dir=output_directory)
def experiment_3(train_seasons, test_seasons):
""" Experiment #3: Use spatial, position, and shot type features """
base_positions = ['G', 'F', 'C']
base_shot_types = ['3pt','fade away', 'hook', 'layup', 'jump', 'dunk']
pos_and_shot_type = base_positions + base_shot_types
# load training data
train_features, train_labels = load_seasons(train_seasons, attributes=pos_and_shot_type, with_attributes_flag=True)
# load test data
test_features, test_labels = load_seasons(test_seasons, attributes=pos_and_shot_type, with_attributes_flag=True)
###### non-parametric models #####
# Logistic Regression
log_reg = LogisticRegression()
log_reg.train(train_features, train_labels)
log_reg.test(test_features, test_labels)
#log_reg.visualize()
# Neural Network code not included
if __name__ == '__main__':
# predict just using spatial features
experiment_1(train_seasons=['2006-2007', '2007-2008', '2008-2009'], test_seasons=['2009-2010'])
# predict using location and position features
experiment_2(train_seasons=['2006-2007', '2007-2008', '2008-2009'], test_seasons=['2009-2010'])
# predict using location and position features
experiment_3(train_seasons=['2006-2007', '2007-2008', '2008-2009'], test_seasons=['2009-2010'])