def _read_data(self):
     if path.exists(DATASET_NAME):
         data = pd.read_pickle(DATASET_NAME)
         return differential_vector(data)
     frames = [pd.read_pickle(match) for match in \
               glob('matches/*/*')]
     data = pd.concat(frames)
     data.drop_duplicates(inplace=True)
     data = filter_stats(data)
     data = data.dropna()
     data['home_free_throw_percentage'].fillna(0, inplace=True)
     data['away_free_throw_percentage'].fillna(0, inplace=True)
     data['points_difference'] = data['home_points'] - data['away_points']
     return differential_vector(data)
Ejemplo n.º 2
0
 def _read_data(self, data_directory):
     frames = [pd.read_pickle(match) for match in \
               glob('%s/*/*' % data_directory)]
     data = pd.concat(frames)
     data.drop_duplicates(inplace=True)
     data = filter_stats(data)
     data = data.dropna()
     data['home_free_throw_percentage'].fillna(0, inplace=True)
     data['away_free_throw_percentage'].fillna(0, inplace=True)
     data['points_difference'] = data['home_points'] - data['away_points']
     return differential_vector(data)
def make_predictions(prediction_stats, games_list, match_info, predictor):
    prediction_list = []
    conferences = Conferences().team_conference

    prediction_data = pd.concat(prediction_stats)
    prediction_data = differential_vector(prediction_data)
    prediction_data['points_difference'] = prediction_data['home_points'] - \
        prediction_data['away_points']
    prediction_data = predictor.simplify(prediction_data)
    predictions = predictor.predict(prediction_data, int)
    for sim in range(len(games_list) / NUM_SIMS):
        total_points = {}
        num_wins = {}
        for i in range(NUM_SIMS):
            x = sim * NUM_SIMS + i
            winner_idx = list(predictions[x]).index(max(predictions[x]))
            loser_idx = list(predictions[x]).index(min(predictions[x]))
            # In the case of a tie, give precedence to the home team.
            if winner_idx == loser_idx:
                winner_idx = 0
                winner = games_list[x].home.abbreviation
                loser_idx = 1
                loser = games_list[x].away.abbreviation
            elif winner_idx == 0:
                winner = games_list[x].home.abbreviation
                loser = games_list[x].away.abbreviation
            else:
                winner = games_list[x].away.abbreviation
                loser = games_list[x].home.abbreviation
            home = games_list[x].home.abbreviation
            away = games_list[x].away.abbreviation
            winner_points = predictions[x][winner_idx]
            loser_points = predictions[x][loser_idx]
            try:
                total_points[winner] += winner_points
            except KeyError:
                total_points[winner] = winner_points
            try:
                total_points[loser] += loser_points
            except KeyError:
                total_points[loser] = loser_points
            try:
                num_wins[winner] += 1
            except KeyError:
                num_wins[winner] = 1
        winner, loser = get_winner(num_wins, home, away)
        winner_prob, loser_prob = get_probability(num_wins, winner, loser)
        winner_points, loser_points = get_points(total_points, winner, loser)
        display_prediction(games_list[sim*NUM_SIMS].title, winner)
        p = create_prediction_data(match_info[sim*NUM_SIMS], conferences,
                                   winner, loser, winner_prob, loser_prob,
                                   winner_points, loser_points)
        prediction_list.append(p)
    return prediction_list
Ejemplo n.º 4
0
def simulate_tournament(seeds, games_list, predictor):
    fields_to_rename = {'win_loss_pct': 'win_pct'}
    winner = None

    for game_name, game_data in sorted(games_list.iteritems()):
        game_data = include_teams(game_data, games_list, seeds)
        match_stats = get_match_stats(game_data['top_team'],
                                      game_data['bottom_team'])
        match_stats = differential_vector(match_stats)
        match_stats.rename(columns=fields_to_rename, inplace=True)
        match_stats_simplified = predictor.simplify(match_stats)
        predictions = predictor.predict(match_stats_simplified, int)
        winner = determine_winner(
            predictions, [game_data['top_team'], game_data['bottom_team']])
        game_data['winner'] = winner
    # This winner is the last winner of the last game, AKA the champion
    return winner
Ejemplo n.º 5
0
def main():
    fields_to_rename = {
        'win_loss_pct': 'win_pct',
        'opp_win_loss_pct': 'opp_win_pct'
    }

    args = arguments()
    predictor = Predictor(args.dataset)
    match_stats = create_matchup_stats(args.home, args.away)
    match_stats.rename(columns=fields_to_rename, inplace=True)
    match_stats = differential_vector(match_stats)
    match_stats_simplified = predictor.simplify(match_stats)
    prediction = predictor.predict(match_stats_simplified, int)
    if prediction[0] == 1:
        print args.home
    else:
        print args.away
    predictor.accuracy
def predict_all_matches(predictor, stats_dict, net_ratings, teams):
    fields_to_rename = {'win_loss_pct': 'win_pct',
                        'opp_win_loss_pct': 'opp_win_pct'}
    team_wins = initialize_team_wins(teams)
    rankings = []

    for dataset in split_datasets(net_ratings):
        games_list = []
        prediction_stats = pd.DataFrame()
        match_stats = []
        for home_team in dataset:
            team_wins[home_team] = 0
            print home_team
            for away_team in dataset:
                if home_team == away_team:
                    continue
                home_stats = stats_dict[home_team]
                away_stats = stats_dict['%s_away' % away_team]
                match_stats.append(pd.concat([away_stats, home_stats], axis=1))
                games_list.append([home_team, away_team])
        try:
            prediction_stats = pd.concat(match_stats)
        # Occurs when only one team is left in a pool. For example, if the teams
        # are divided into approximately 10 groups, there will be a single team
        # leftover which will automatically be the lowest-tier team, so add them
        # to the end of the rankings.
        except ValueError:
            rankings.append(home_team)
            continue
        match_vector = differential_vector(prediction_stats)
        match_vector.rename(columns=fields_to_rename, inplace=True)
        match_stats_simplified = predictor.simplify(match_vector)
        predictions = predictor.predict(match_stats_simplified, int)
        probabilities = predictor.predict_probability(match_stats_simplified)
        team_wins = get_totals(games_list, predictions, team_wins,
                               probabilities)
        rankings = update_rankings(rankings, team_wins)
    return rankings
def predict_all_matches(predictor, stats_dict, conference, schedule,
                        conference_wins):
    games_list = []
    prediction_stats = pd.DataFrame()
    match_stats = []
    team_wins = {}

    for team in teams_list(conference):
        team_wins[team] = 0

    for matchup in schedule:
        home, away = matchup
        home_stats = stats_dict[home]
        away_stats = stats_dict['%s_away' % away]
        match_stats.append(pd.concat([away_stats, home_stats], axis=1))
    prediction_stats = pd.concat(match_stats)
    match_vector = differential_vector(prediction_stats)
    match_vector['points_difference'] = match_vector['home_points'] - \
        match_vector['away_points']
    match_stats_simplified = predictor.simplify(match_vector)
    predictions = predictor.predict(match_stats_simplified, int)
    team_wins = get_totals(schedule, predictions, team_wins, conference_wins)
    return team_wins