Python GameDataPoss Examples, GameDataPoss Python Examples

Example #1

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getSeasonsValues(event, seasons):
  if not isinstance(seasons, basestring):
    first = True
    v = []
    for s in seasons:
      if first:
        v = GameDataPoss.readSeasonEventComparables(s, event)
        first = False
      else:
        v = np.concatenate(v, GameDataPoss.readSeasonEventComparables(s, event))
  else:
    v = GameDataPoss.readSeasonEventComparables(seasons, event)
  return Values(v, passes = event == 'pass', shot = event == 'shot')

Example #2

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getSeasonsValues(event, seasons):
    if not isinstance(seasons, basestring):
        first = True
        v = []
        for s in seasons:
            if first:
                v = GameDataPoss.readSeasonEventComparables(s, event)
                first = False
            else:
                v = np.concatenate(
                    v, GameDataPoss.readSeasonEventComparables(s, event))
    else:
        v = GameDataPoss.readSeasonEventComparables(seasons, event)
    return Values(v, passes=event == 'pass', shot=event == 'shot')

Example #3

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getAllTeamValues(event, seasons):
  team_values = {}
  for season in seasons:
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
      v = getTeamValues(event, t, season)
      team_values[t] = v
  return team_values

Example #4

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getAllTeamValues(event, seasons):
    team_values = {}
    for season in seasons:
        teams = GameDataPoss.getSeasonTeamIDs(season)
        for t in teams:
            v = getTeamValues(event, t, season)
            team_values[t] = v
    return team_values

Example #5

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def plot_histo2d_all_teams(event, season, goals = False, outcome = False):
  print 'plotting hist2d for teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Getting team', t
    v = getTeamValues(event, t, season)
    data = (v.getValues('x'), v.getValues('y'))
    fname = t + '_' + event + '-hist2d.png'
    histo2d(data, ranged = False, title = event.capitalize() + ' frequencies for team ' + t, numBins = get_bins(), save = True, filename = fname)

Example #6

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def plot_histo_passes_all_teams(season, save = False):
  print 'plotting hist for all teams'
  teams = GameDataPoss.getSeasonTeamIDs(season)
  for t in teams:
    print 'Team', t
    v = getTeamValues('pass', t, season)
    all_distances = v.getValues('distance')
    outcomes = v.getValues('outcome')
    success = all_distances[outcomes == 1]
    data = [all_distances, success]
    labels = ['All', 'Successful']
    multiHisto(data, labels, numBins = 20, title = 'Distances of Passes for ' + str(t), xlabel = 'Approximate Distance in yards')

Example #7

0

Show file

File: Graphing.py Project: mattyhk/soccer-meng

def draw_season_graph(season='liga12'):
    fname = DATA_PATH + season + '-average-distances.txt'
    distances = np.loadtxt(fname, delimiter=',', dtype='float')
    teams = GameDataPoss.getSeasonTeamIDs(season)
    team_dict = get_team_dict(season)

    G = coloured_edge_graph(teams, distances, team_dict)

    # G = create_distance_graph(teams, distances)

    pos = get_position(G, team_dict)

    draw_colored_graph(G, pos)
    return G

Example #8

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def plot_histo2d_all_teams(event, season, goals=False, outcome=False):
    print 'plotting hist2d for teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Getting team', t
        v = getTeamValues(event, t, season)
        data = (v.getValues('x'), v.getValues('y'))
        fname = t + '_' + event + '-hist2d.png'
        histo2d(data,
                ranged=False,
                title=event.capitalize() + ' frequencies for team ' + t,
                numBins=get_bins(),
                save=True,
                filename=fname)

Example #9

0

Show file

File: Graphing.py Project: mattyhk/soccer-meng

def draw_season_graph(season='liga12'):
  fname = DATA_PATH + season + '-average-distances.txt'
  distances = np.loadtxt(fname, delimiter=',', dtype='float')
  teams = GameDataPoss.getSeasonTeamIDs(season)
  team_dict = get_team_dict(season)


  G = coloured_edge_graph(teams, distances, team_dict)

  # G = create_distance_graph(teams, distances)

  pos = get_position(G, team_dict)

  draw_colored_graph(G, pos)
  return G

Example #10

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def plot_histo_passes_all_teams(season, save=False):
    print 'plotting hist for all teams'
    teams = GameDataPoss.getSeasonTeamIDs(season)
    for t in teams:
        print 'Team', t
        v = getTeamValues('pass', t, season)
        all_distances = v.getValues('distance')
        outcomes = v.getValues('outcome')
        success = all_distances[outcomes == 1]
        data = [all_distances, success]
        labels = ['All', 'Successful']
        multiHisto(data,
                   labels,
                   numBins=20,
                   title='Distances of Passes for ' + str(t),
                   xlabel='Approximate Distance in yards')

Example #11

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def clustering_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """

    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    # for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    est = KMeans(n_clusters=num_teams)
    labels = est.fit_predict(X)
    label_dict = {}

    for i in xrange(len(labels)):
        if labels[i] not in label_dict:
            label_dict[labels[i]] = [y[i]]
        else:
            label_dict[labels[i]].append(y[i])

    return labels, y, label_dict

Example #12

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def distances_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
  random.seed(0)
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  averages = np.zeros((num_teams, num_teams))

  for i in xrange(NUM_EXPERIMENTS):
    X, y = get_team_histo2d_features(data_teams, teams)
    dist_mat = create_distance_matrix(X)
    a = get_average_distances_between_teams(dist_mat, num_teams, NUM_SAMPLES)
    averages = averages + a

  averages = averages / float(NUM_EXPERIMENTS)
  saveArrayAsCsv(averages, season + '-average-distances.txt', precision = 10)
  return averages

Example #13

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def clustering_experiment(season, event, NUM_EXPERIMENTS = 10, NUM_SAMPLES = 10):
  """ Creats NUM_SAMPLES per team. Creates the feature for each sample, and performs K-Means clustering """
  
  data_teams = getAllTeamValues(event, [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)
  num_teams = len(teams)

  # for i in xrange(NUM_EXPERIMENTS):
  X, y = get_team_histo2d_features(data_teams, teams)
  est = KMeans(n_clusters = num_teams)
  labels = est.fit_predict(X)
  label_dict = {}

  for i in xrange(len(labels)):
    if labels[i] not in label_dict:
      label_dict[labels[i]] = [y[i]]
    else:
      label_dict[labels[i]].append(y[i])

  
  return labels, y, label_dict

Example #14

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def knn_classification(event, seasons):
  """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """ 
  # Get Teams, Data
  print 'Getting data'
  data_teams = getAllTeamValues(event, seasons)
  teams = []
  for s in seasons:
    teams += GameDataPoss.getSeasonTeamIDs(s)
    
  print 'Splitting data'
  X, y = get_team_histo2d_features(data_teams, teams)
  # X,y = get_team_histo_pass_features(data_teams, teams)
  print X.shape
  print y.shape
  X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

  # Number of Neighbours
  print 'Classifying'
  x_predict = knn_classify(X_train, X_test, y_train, y_test, k = 5)
  return X, y, x_predict, X_test, y_test, X_train, y_train

Example #15

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def knn_classification(event, seasons):
    """ Randomly resamples with replacement a season's worth of events for each team. Calculates the 2d histogram of each sample to create a feature vector,
  then performs knn classification """
    # Get Teams, Data
    print 'Getting data'
    data_teams = getAllTeamValues(event, seasons)
    teams = []
    for s in seasons:
        teams += GameDataPoss.getSeasonTeamIDs(s)

    print 'Splitting data'
    X, y = get_team_histo2d_features(data_teams, teams)
    # X,y = get_team_histo_pass_features(data_teams, teams)
    print X.shape
    print y.shape
    X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

    # Number of Neighbours
    print 'Classifying'
    x_predict = knn_classify(X_train, X_test, y_train, y_test, k=5)
    return X, y, x_predict, X_test, y_test, X_train, y_train

Example #16

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def distances_experiment(season, event, NUM_EXPERIMENTS=10, NUM_SAMPLES=10):
    """ Creates NUM_SAMPLES per team. Creates the features for each sample, and constructs the distance matrix between all samples. Computes 
  the average distance between the samples for each pair of teams (including itself). Repeats the experiment NUM_EXPERIMENTS times, 
  and takes the average distance. Returns the averaged value of the average distances between each pair of teams """
    random.seed(0)
    data_teams = getAllTeamValues(event, [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)
    num_teams = len(teams)

    averages = np.zeros((num_teams, num_teams))

    for i in xrange(NUM_EXPERIMENTS):
        X, y = get_team_histo2d_features(data_teams, teams)
        dist_mat = create_distance_matrix(X)
        a = get_average_distances_between_teams(dist_mat, num_teams,
                                                NUM_SAMPLES)
        averages = averages + a

    averages = averages / float(NUM_EXPERIMENTS)
    saveArrayAsCsv(averages, season + '-average-distances.txt', precision=10)
    return averages

Example #17

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getGameValues(event, game, season):
    v = GameDataPoss.getGameAsSplitValues(season, game)
    return Values(v[event], passes=event == 'pass', shot=event == 'shot')

Example #18

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getTeamValues(event, team, season):
    events = GameDataPoss.readTeamEventSplitValues(season, team, event)
    return Values(events, passes=event == 'pass', shot=event == 'shot')

Example #19

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def knn_classification_experiment(season, NUM_EXPERIMENTS = 200):
  np.random.seed(0)

  data_teams = getAllTeamValues('pass', [season])
  teams = GameDataPoss.getSeasonTeamIDs(season)

  num_passes = {}
  total_passes = 0
  for t in teams:
    team_values = data_teams[t]
    team_values = team_values.getSlicedValues(['x', 'y'])
    num_passes[t] = team_values.shape[0]
    team_values = team_values[np.where(team_values[:,0] >= 38.33333)]
    team_values = team_values[np.where(team_values[:,0] <= 76.66667)]
    num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(num_passes[t])
    # total_passes += team_values.shape[0]
  print num_passes
  return num_passes
  

  accuracy = []
  confusion = []
  precision = []
  recall = []
  k = []

  tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
  # tuned_params = [{'n_neighbors': [1, 2, 3]}]

  for i in range(10):

    X, y = get_team_histo2d_features(data_teams, teams)

    for i in range(NUM_EXPERIMENTS):
      # print '-------------Experiment #', i, '-----------------'
      X_train, X_test, y_train, y_test = create_test_split(X, y, test_size=0.3)

      clf = GridSearchCV(KNeighborsClassifier(weights='distance'), param_grid = tuned_params, cv = cross_validation.StratifiedKFold(y_train))
      # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
      clf.fit(X_train, y_train)

      y_true, y_pred = y_test, clf.predict(X_test)
      acc = clf.score(X_test, y_test)
      # print(classification_report(y_true, y_pred))
      # print acc
      # print clf.grid_scores_
      # print clf.best_params_
      # print confusion_matrix(y_true, y_pred)
      p = precision_score(y_true, y_pred, average=None)
      r = recall_score(y_true, y_pred, average=None)
      precision.append(p)
      recall.append(r)
      k.append(clf.best_params_['n_neighbors'])


      accuracy.append(acc)
      confusion.append(confusion_matrix(y_true, y_pred))

  c = confusion[0]
  for c_matrix in confusion[1:]:
    c = c + c_matrix
  c = c / float(NUM_EXPERIMENTS * 10)


  # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

  return np.array(accuracy), c, np.array(precision), np.array(recall), np.array(k)

Example #20

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getTeamValues(event, team, season):
  events = GameDataPoss.readTeamEventSplitValues(season, team, event)
  return Values(events, passes = event == 'pass', shot = event == 'shot')

Example #21

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def getGameValues(event, game, season):
  v = GameDataPoss.getGameAsSplitValues(season, game)
  return Values(v[event], passes = event == 'pass', shot = event == 'shot')

Example #22

0

Show file

File: Clustering.py Project: mattyhk/soccer-meng

def knn_classification_experiment(season, NUM_EXPERIMENTS=200):
    np.random.seed(0)

    data_teams = getAllTeamValues('pass', [season])
    teams = GameDataPoss.getSeasonTeamIDs(season)

    num_passes = {}
    total_passes = 0
    for t in teams:
        team_values = data_teams[t]
        team_values = team_values.getSlicedValues(['x', 'y'])
        num_passes[t] = team_values.shape[0]
        team_values = team_values[np.where(team_values[:, 0] >= 38.33333)]
        team_values = team_values[np.where(team_values[:, 0] <= 76.66667)]
        num_passes[t] = (num_passes[t] - team_values.shape[0]) / float(
            num_passes[t])
        # total_passes += team_values.shape[0]
    print num_passes
    return num_passes

    accuracy = []
    confusion = []
    precision = []
    recall = []
    k = []

    tuned_params = [{'n_neighbors': [2, 3, 4, 5, 6, 7]}]
    # tuned_params = [{'n_neighbors': [1, 2, 3]}]

    for i in range(10):

        X, y = get_team_histo2d_features(data_teams, teams)

        for i in range(NUM_EXPERIMENTS):
            # print '-------------Experiment #', i, '-----------------'
            X_train, X_test, y_train, y_test = create_test_split(X,
                                                                 y,
                                                                 test_size=0.3)

            clf = GridSearchCV(KNeighborsClassifier(weights='distance'),
                               param_grid=tuned_params,
                               cv=cross_validation.StratifiedKFold(y_train))
            # clf = GridSearchCV(KNeighborsClassifier(), param_grid = tuned_params)
            clf.fit(X_train, y_train)

            y_true, y_pred = y_test, clf.predict(X_test)
            acc = clf.score(X_test, y_test)
            # print(classification_report(y_true, y_pred))
            # print acc
            # print clf.grid_scores_
            # print clf.best_params_
            # print confusion_matrix(y_true, y_pred)
            p = precision_score(y_true, y_pred, average=None)
            r = recall_score(y_true, y_pred, average=None)
            precision.append(p)
            recall.append(r)
            k.append(clf.best_params_['n_neighbors'])

            accuracy.append(acc)
            confusion.append(confusion_matrix(y_true, y_pred))

    c = confusion[0]
    for c_matrix in confusion[1:]:
        c = c + c_matrix
    c = c / float(NUM_EXPERIMENTS * 10)

    # saveArrayAsCsv(c, season + '_midfield_confusion_matrix.txt')

    return np.array(accuracy), c, np.array(precision), np.array(
        recall), np.array(k)