Esempio n. 1
0
def calc_features(regular_season_data, season, team1, team2, tourney_seeds=None, wp_data=None):

    relevant_data = get_relevant_data(regular_season_data, season=season, team1=team1, team2=team2)
    columns_with_winning_team_data = np.concatenate((np.array([3]), np.arange(8, 21)))
    columns_with_losing_team_data = np.concatenate((np.array([5]), np.arange(21, 34)))

    effective_team1_stats = []
    effective_team2_stats = []
    team1_wins = 0
    team1_loses = 0
    team2_wins = 0
    team2_loses = 0

    for record in relevant_data:
        winning_stats = record[columns_with_winning_team_data]
        losing_stats = record[columns_with_losing_team_data]
        if record[2] == team1:  # team1 was the winner
            team1_wins += 1.0
            effective_team1_stats.append(winning_stats)
            effective_team2_stats.append(losing_stats)
        elif record[4] == team1:  # team1 was the loser
            team1_loses += 1.0
            effective_team1_stats.append(losing_stats)
            effective_team2_stats.append(winning_stats)
        elif record[2] == team2:  # team2 was the winner
            team2_wins += 1.0
            effective_team2_stats.append(winning_stats)
            effective_team1_stats.append(losing_stats)
        elif record[4] == team2:  # team1 was the loser
            team2_loses += 1.0
            effective_team2_stats.append(losing_stats)
            effective_team1_stats.append(winning_stats)

    effective_team1_stats = np.array(effective_team1_stats).astype(float)
    effective_team2_stats = np.array(effective_team2_stats).astype(float)

    # features are the average over the difference of effective stats
    features = np.average(effective_team1_stats, axis=0) - np.average(effective_team2_stats, axis=0)

    #add probability of team1 outscoring team2, assuming gaussian stats
    # scores are in column 0 of effective stats
    mean = np.mean(effective_team1_stats[:, 0]) - np.mean(effective_team2_stats[:, 0])
    covariance_matrix = np.cov(effective_team1_stats[:, 0], effective_team2_stats[:, 0])
    variance = covariance_matrix[0, 0] + covariance_matrix[1, 1] - 2.0*covariance_matrix[1, 0]
    # probability that team 1 outscores team 2
    features = np.concatenate((features, np.array([0.5*(sps.erfc(-mean/np.sqrt(2.0*variance)))])))

    # add team winning percentages to the feature list
    # should change this so that home win and away wins are weighted differently
    features = np.concatenate(
        (
            features,
            np.array([team1_wins/(team1_loses+team1_wins),
                      team2_wins/(team2_loses+team2_wins)])
        )
    )

    # add team seeds to feature list
    if tourney_seeds is not None:
        keep_seeds = np.where(tourney_seeds[:, 0] == season)[0]
        tourney_seeds = tourney_seeds[keep_seeds]
        # get team1's seed, if it exists
        seed1 = tourney_seeds[np.where(tourney_seeds[:, 2] == team1)[0], 1]
        # get team2's seed, if it exists
        seed2 = tourney_seeds[np.where(tourney_seeds[:, 2] == team2)[0], 1]

        # if unseeded, set seed to C17 (C will is a placeholder for conference and will be taken out shortly)
        seed1 = (seed1[0] if len(seed1) == 1 else 'C17')
        seed2 = (seed2[0] if len(seed2) == 1 else 'C17')

        # take out conference and convert to int
        seed1 = int(seed1[1:3])
        seed2 = int(seed2[1:3])

        features = np.concatenate(
            (
                features,
                np.array([seed1, seed2])
            )
        )


    return features
Esempio n. 2
0
def calc_new_features(regular_season_data, season, team1, team2, tuning_params):

    relevant_data = get_relevant_data(regular_season_data, season=season, team1=team1, team2=team2)
    model_params = model_params_from_game_data(relevant_data)
    columns_with_winning_team_data = np.arange(0, 5)
    columns_with_losing_team_data = np.arange(5, 10)

    effective_team1_stats = []
    effective_team2_stats = []

    team1_wins = 0
    team1_loses = 0
    team2_wins = 0
    team2_loses = 0

    team1_erpi = RelativePowerIndexLookupTable.lookup(season, team1, 'erpi')
    team2_erpi = RelativePowerIndexLookupTable.lookup(season, team2, 'erpi')
    effective_team1_erpi = []
    effective_team2_erpi = []

    for i, record in enumerate(relevant_data):
        winning_team_erpi = RelativePowerIndexLookupTable.lookup(season, record[2], 'erpi')
        losing_team_erpi = RelativePowerIndexLookupTable.lookup(season, record[4], 'erpi')

        winning_stats = model_params[i, columns_with_winning_team_data]
        losing_stats = model_params[i, columns_with_losing_team_data]
        if record[2] == team1:  # team1 was the winner
            team1_wins += 1.0
            effective_team1_stats.append(winning_stats)
            effective_team2_stats.append(losing_stats)
            effective_team1_erpi.append(winning_team_erpi)
            effective_team2_erpi.append(losing_team_erpi)
        elif record[4] == team1:  # team1 was the loser
            team1_loses += 1.0
            effective_team1_stats.append(losing_stats)
            effective_team2_stats.append(winning_stats)
            effective_team1_erpi.append(losing_team_erpi)
            effective_team2_erpi.append(winning_team_erpi)
        elif record[2] == team2:  # team2 was the winner
            team2_wins += 1.0
            effective_team2_stats.append(winning_stats)
            effective_team1_stats.append(losing_stats)
            effective_team2_erpi.append(winning_team_erpi)
            effective_team1_erpi.append(losing_team_erpi)
        elif record[4] == team2:  # team2 was the loser
            team2_loses += 1.0
            effective_team2_stats.append(losing_stats)
            effective_team1_stats.append(winning_stats)
            effective_team2_erpi.append(losing_team_erpi)
            effective_team1_erpi.append(winning_team_erpi)

    effective_team1_rpi = np.array(effective_team1_erpi)
    effective_team2_rpi = np.array(effective_team2_erpi)

    weights = np.exp(-0.5*(
        (effective_team1_rpi - team1_erpi)**2
        + (effective_team2_rpi - team2_erpi)**2
    )/tuning_params['std']**2)

    weights /= np.sum(weights)


    effective_team1_stats = np.array(effective_team1_stats)
    effective_team2_stats = np.array(effective_team2_stats)


    # model_params = np.concatenate((np.average(effective_team1_stats, axis=0),
    #                                np.average(effective_team2_stats, axis=0)))
    model_params = np.concatenate(
        (np.sum(weights[:, np.newaxis]*effective_team1_stats, axis=0),
         np.sum(weights[:, np.newaxis]*effective_team2_stats, axis=0)))

    possessions = int(0.5*(model_params[0] + model_params[5]))
    ot_possessions = int(0.5*(model_params[0] + model_params[5])/8.0)

    team1_dist = scoring_distribution(model_params[1:5], possessions)
    team1_ot_dist = scoring_distribution(model_params[1:5], ot_possessions)

    team2_dist = scoring_distribution(model_params[6:10], possessions)
    team2_ot_dist = scoring_distribution(model_params[6:10], ot_possessions)
    feature = np.sum(np.cumsum(team2_dist)[:-1]*team1_dist[1:]) \
              + np.sum(team2_dist*team1_dist)/(1.0 - np.sum(team2_ot_dist*team1_ot_dist))*\
                np.sum(np.cumsum(team2_ot_dist)[:-1]*team1_ot_dist[1:])

    feature_erpi_ratio = team1_erpi/team2_erpi

    mean_score1 = np.sum(np.arange(len(team1_dist))*team1_dist)
    mean_score2 = np.sum(np.arange(len(team2_dist))*team2_dist)

    return [feature, feature_erpi_ratio, mean_score1, mean_score2]