Example #1
0
def getSeasonsValues(event, seasons):
  if not isinstance(seasons, basestring):
    first = True
    v = []
    for s in seasons:
      if first:
        v = GameDataPoss.readSeasonEventComparables(s, event)
        first = False
      else:
        v = np.concatenate(v, GameDataPoss.readSeasonEventComparables(s, event))
  else:
    v = GameDataPoss.readSeasonEventComparables(seasons, event)
  return Values(v, passes = event == 'pass', shot = event == 'shot')
Example #2
0
def getSeasonsValues(event, seasons):
    if not isinstance(seasons, basestring):
        first = True
        v = []
        for s in seasons:
            if first:
                v = GameDataPoss.readSeasonEventComparables(s, event)
                first = False
            else:
                v = np.concatenate(
                    v, GameDataPoss.readSeasonEventComparables(s, event))
    else:
        v = GameDataPoss.readSeasonEventComparables(seasons, event)
    return Values(v, passes=event == 'pass', shot=event == 'shot')
Example #3
0
def getAllTeamValues(event, seasons):
  team_values = {}
  for season in seasons:
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
      v = getTeamValues(event, t, season)
      team_values[t] = v
  return team_values
Example #4
0
def getAllTeamValues(event, seasons):
    team_values = {}
    for season in seasons:
        teams = GameDataPoss.getSeasonTeamIDs(season)
        for t in teams:
            v = getTeamValues(event, t, season)
            team_values[t] = v
    return team_values
Example #5
0
def plot_histo2d_all_teams(event, season, goals = False, outcome = False):
  print 'plotting hist2d for teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Getting team', t
    v = getTeamValues(event, t, season)
    data = (v.getValues('x'), v.getValues('y'))
    fname = t + '_' + event + '-hist2d.png'
    histo2d(data, ranged = False, title = event.capitalize() + ' frequencies for team ' + t, numBins = get_bins(), save = True, filename = fname)
Example #6
0
def plot_histo_passes_all_teams(season, save = False):
  print 'plotting hist for all teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Team', t
    v = getTeamValues('pass', t, season)
    all_distances = v.getValues('distance')
    outcomes = v.getValues('outcome')
    success = all_distances[outcomes == 1]
    data = [all_distances, success]
    labels = ['All', 'Successful']
    multiHisto(data, labels, numBins = 20, title = 'Distances of Passes for ' + str(t), xlabel = 'Approximate Distance in yards')
Example #7
0
def draw_season_graph(season='liga12'):
    fname = DATA_PATH + season + '-average-distances.txt'
    distances = np.loadtxt(fname, delimiter=',', dtype='float')
    teams = GameDataPoss.getSeasonTeamIDs(season)
    team_dict = get_team_dict(season)

    G = coloured_edge_graph(teams, distances, team_dict)

    # G = create_distance_graph(teams, distances)

    pos = get_position(G, team_dict)

    draw_colored_graph(G, pos)
    return G
Example #8
0
def plot_histo2d_all_teams(event, season, goals=False, outcome=False):
    print 'plotting hist2d for teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Getting team', t
        v = getTeamValues(event, t, season)
        data = (v.getValues('x'), v.getValues('y'))
        fname = t + '_' + event + '-hist2d.png'
        histo2d(data,
                ranged=False,
                title=event.capitalize() + ' frequencies for team ' + t,
                numBins=get_bins(),
                save=True,
                filename=fname)
Example #9
0
def draw_season_graph(season='liga12'):
  fname = DATA_PATH + season + '-average-distances.txt'
  distances = np.loadtxt(fname, delimiter=',', dtype='float')
  teams = GameDataPoss.getSeasonTeamIDs(season)
  team_dict = get_team_dict(season)


  G = coloured_edge_graph(teams, distances, team_dict)

  # G = create_distance_graph(teams, distances)

  pos = get_position(G, team_dict)

  draw_colored_graph(G, pos)
  return G
Example #10
0
def plot_histo_passes_all_teams(season, save=False):
    print 'plotting hist for all teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Team', t
        v = getTeamValues('pass', t, season)
        all_distances = v.getValues('distance')
        outcomes = v.getValues('outcome')
        success = all_distances[outcomes == 1]
        data = [all_distances, success]
        labels = ['All', 'Successful']
        multiHisto(data,
                   labels,
                   numBins=20,
                   title='Distances of Passes for ' + str(t),
                   xlabel='Approximate Distance in yards')
Example #11
0
def clustering_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """

    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    # for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    est = KMeans(n_clusters=num_teams)
    labels = est.fit_predict(X)
    label_dict = {}

    for i in xrange(len(labels)):
        if labels[i] not in label_dict:
            label_dict[labels[i]] = [y[i]]
        else:
            label_dict[labels[i]].append(y[i])

    return labels, y, label_dict
Example #12
0
def distances_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
  random.seed(0)
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  averages = np.zeros((num_teams, num_teams))

  for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    dist_mat = create_distance_matrix(X)
    a = get_average_distances_between_teams(dist_mat, num_teams, NUM_SAMPLES)
    averages = averages + a

  averages = averages / float(NUM_EXPERIMENTS)
  saveArrayAsCsv(averages, season + '-average-distances.txt', precision = 10)
  return averages
Example #13
0
def clustering_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """
  
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  # for i in xrange(NUM_EXPERIMENTS):
  X, y = get_team_histo2d_features(data_teams, teams)
  est = KMeans(n_clusters = num_teams)
  labels = est.fit_predict(X)
  label_dict = {}

  for i in xrange(len(labels)):
    if labels[i] not in label_dict:
      label_dict[labels[i]] = [y[i]]
    else:
      label_dict[labels[i]].append(y[i])

  
  return labels, y, label_dict
Example #14
0
def knn_classification(event, seasons):
  """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """ 
  # Get Teams, Data
  print 'Getting data'
  data_teams = getAllTeamValues(event, seasons)
  teams = []
  for s in seasons:
    teams += GameDataPoss.getSeasonTeamIDs(s)
    
  print 'Splitting data'
  X, y = get_team_histo2d_features(data_teams, teams)
  # X,y = get_team_histo_pass_features(data_teams, teams)
  print X.shape
  print y.shape
  X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

  # Number of Neighbours
  print 'Classifying'
  x_predict = knn_classify(X_train, X_test, y_train, y_test, k = 5)
  return X, y, x_predict, X_test, y_test, X_train, y_train
Example #15
0
def knn_classification(event, seasons):
    """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """
    # Get Teams, Data
    print 'Getting data'
    data_teams = getAllTeamValues(event, seasons)
    teams = []
    for s in seasons:
        teams += GameDataPoss.getSeasonTeamIDs(s)

    print 'Splitting data'
    X, y = get_team_histo2d_features(data_teams, teams)
    # X,y = get_team_histo_pass_features(data_teams, teams)
    print X.shape
    print y.shape
    X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

    # Number of Neighbours
    print 'Classifying'
    x_predict = knn_classify(X_train, X_test, y_train, y_test, k=5)
    return X, y, x_predict, X_test, y_test, X_train, y_train
Example #16
0
def distances_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
    random.seed(0)
    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    averages = np.zeros((num_teams, num_teams))

    for i in xrange(NUM_EXPERIMENTS):
        X, y = get_team_histo2d_features(data_teams, teams)
        dist_mat = create_distance_matrix(X)
        a = get_average_distances_between_teams(dist_mat, num_teams,
                                                NUM_SAMPLES)
        averages = averages + a

    averages = averages / float(NUM_EXPERIMENTS)
    saveArrayAsCsv(averages, season + '-average-distances.txt', precision=10)
    return averages
Example #17
0
def getGameValues(event, game, season):
    v = GameDataPoss.getGameAsSplitValues(season, game)
    return Values(v[event], passes=event == 'pass', shot=event == 'shot')
Example #18
0
def getTeamValues(event, team, season):
    events = GameDataPoss.readTeamEventSplitValues(season, team, event)
    return Values(events, passes=event == 'pass', shot=event == 'shot')
Example #19
0
def knn_classification_experiment(season, NUM_EXPERIMENTS = 200):
  np.random.seed(0)

  data_teams = getAllTeamValues('pass', [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)

  num_passes = {}
  total_passes = 0
  for t in teams:
    team_values = data_teams[t]
    team_values = team_values.getSlicedValues(['x', 'y'])
    num_passes[t] = team_values.shape[0]
    team_values = team_values[np.where(team_values[:,0] >= 38.33333)]
    team_values = team_values[np.where(team_values[:,0] <= 76.66667)]
    num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(num_passes[t])
    # total_passes += team_values.shape[0]
  print num_passes
  return num_passes
  

  accuracy = []
  confusion = []
  precision = []
  recall = []
  k = []

  tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
  # tuned_params = [{'n_neighbors': [1, 2, 3]}]

  for i in range(10):

    X, y = get_team_histo2d_features(data_teams, teams)

    for i in range(NUM_EXPERIMENTS):
      # print '-------------Experiment #', i, '-----------------'
      X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

      clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid = tuned_params, cv = cross_validation.StratifiedKFold(y_train))
      # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
      clf.fit(X_train, y_train)

      y_true, y_pred = y_test, clf.predict(X_test)
      acc = clf.score(X_test, y_test)
      # print(classification_report(y_true, y_pred))
      # print acc
      # print clf.grid_scores_
      # print clf.best_params_
      # print confusion_matrix(y_true, y_pred)
      p = precision_score(y_true, y_pred, average=None)
      r = recall_score(y_true, y_pred, average=None)
      precision.append(p)
      recall.append(r)
      k.append(clf.best_params_['n_neighbors'])


      accuracy.append(acc)
      confusion.append(confusion_matrix(y_true, y_pred))

  c = confusion[0]
  for c_matrix in confusion[1:]:
    c = c + c_matrix
  c = c / float(NUM_EXPERIMENTS * 10)


  # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

  return np.array(accuracy), c, np.array(precision), np.array(recall), np.array(k)
Example #20
0
def getTeamValues(event, team, season):
  events = GameDataPoss.readTeamEventSplitValues(season, team, event)
  return Values(events, passes = event == 'pass', shot = event == 'shot')
Example #21
0
def getGameValues(event, game, season):
  v = GameDataPoss.getGameAsSplitValues(season, game)
  return Values(v[event], passes = event == 'pass', shot = event == 'shot')
Example #22
0
def knn_classification_experiment(season, NUM_EXPERIMENTS=200):
    np.random.seed(0)

    data_teams = getAllTeamValues('pass', [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)

    num_passes = {}
    total_passes = 0
    for t in teams:
        team_values = data_teams[t]
        team_values = team_values.getSlicedValues(['x', 'y'])
        num_passes[t] = team_values.shape[0]
        team_values = team_values[np.where(team_values[:, 0] >= 38.33333)]
        team_values = team_values[np.where(team_values[:, 0] <= 76.66667)]
        num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(
            num_passes[t])
        # total_passes += team_values.shape[0]
    print num_passes
    return num_passes

    accuracy = []
    confusion = []
    precision = []
    recall = []
    k = []

    tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
    # tuned_params = [{'n_neighbors': [1, 2, 3]}]

    for i in range(10):

        X, y = get_team_histo2d_features(data_teams, teams)

        for i in range(NUM_EXPERIMENTS):
            # print '-------------Experiment #', i, '-----------------'
            X_train, X_test, y_train, y_test = create_test_split(X,
                                                                 y,
                                                                 test_size=0.3)

            clf = GridSearchCV(KNeighborsClassifier(weights='distance'),
                               param_grid=tuned_params,
                               cv=cross_validation.StratifiedKFold(y_train))
            # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
            clf.fit(X_train, y_train)

            y_true, y_pred = y_test, clf.predict(X_test)
            acc = clf.score(X_test, y_test)
            # print(classification_report(y_true, y_pred))
            # print acc
            # print clf.grid_scores_
            # print clf.best_params_
            # print confusion_matrix(y_true, y_pred)
            p = precision_score(y_true, y_pred, average=None)
            r = recall_score(y_true, y_pred, average=None)
            precision.append(p)
            recall.append(r)
            k.append(clf.best_params_['n_neighbors'])

            accuracy.append(acc)
            confusion.append(confusion_matrix(y_true, y_pred))

    c = confusion[0]
    for c_matrix in confusion[1:]:
        c = c + c_matrix
    c = c / float(NUM_EXPERIMENTS * 10)

    # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

    return np.array(accuracy), c, np.array(precision), np.array(
        recall), np.array(k)