예제 #1
0
def get_GA_nums(schedule_data, individual,
                   team_data1, team_data2, team_cols1, team_cols2,
                   player_data1, player_data2, player_cols1, player_cols2, health_data):
    num_games_loop = len(schedule_data)
    print('Num games to predict: ' + str(num_games_loop))
    home_ga_nums = []
    visitor_ga_nums = []
    home_pts = []
    visitor_pts = []
    for i in range(num_games_loop):
        team_home = schedule_data.loc[i, 'home']
        team_visitor = schedule_data.loc[i, 'visitor']
        game_date = schedule_data.loc[i, 'date']

        [team_nums_home1] = np.array(team_data1[team_data1['team'] == team_home])
        [team_nums_home2] = np.array(team_data2[team_data2['team'] == team_home])
        [team_nums_visitor1] = np.array(team_data1[team_data1['team'] == team_visitor])
        [team_nums_visitor2] = np.array(team_data2[team_data2['team'] == team_visitor])

        team_nums_home = np.hstack((team_nums_home1[team_cols1], team_nums_home2[team_cols2]))
        team_nums_visitor = np.hstack((team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2]))

        player_nums_home1 = np.array(player_data1[player_data1['team'] == team_home])
        player_nums_home2 = np.array(player_data2[player_data2['team'] == team_home])
        player_nums_visitor1 = np.array(player_data1[player_data1['team'] == team_visitor])
        player_nums_visitor2 = np.array(player_data2[player_data2['team'] == team_visitor])

        player_nums_home = np.hstack((player_nums_home1[:,player_cols1], player_nums_home2[:,player_cols2]))
        player_nums_visitor = np.hstack((player_nums_visitor1[:,player_cols1], player_nums_visitor2[:,player_cols2]))

        schedule_stats_home = GA_Funs.get_schedule_stats(schedule_data, schedule_data,
                                                         game_date, team_home, future_logical=False)

        schedule_stats_visitor = GA_Funs.get_schedule_stats(schedule_data, schedule_data,
                                                            game_date, team_visitor, future_logical=False)

        player_indices = range(len(player_cols1) + len(player_cols2))
        team_indices = range(len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices))
        schedule_indices = range(len(team_indices) + len(player_indices), len(individual))

        health_mod_home = []
        for x in player_data1['name'][player_data1['team'] == team_home]:
            if x in list(health_data['name']):
                health_mod_home.extend(health_data.loc[health_data['name']==x,'health_mod'])
            else:
                health_mod_home.extend([1.0])

        health_mod_visitor = []
        for x in player_data1['name'][player_data1['team'] == team_visitor]:
            if x in list(health_data['name']):
                health_mod_visitor.extend(health_data.loc[health_data['name']==x,'health_mod'])
            else:
                health_mod_visitor.extend([1.0])

        home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \
            np.array(team_nums_home).dot(individual[team_indices]) + \
            np.array(schedule_stats_home).dot(individual[schedule_indices])

        visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \
            np.array(team_nums_visitor).dot(individual[team_indices]) + \
            np.array(schedule_stats_visitor).dot(individual[schedule_indices])

        home_ga_nums.extend([home_num])
        visitor_ga_nums.extend([visitor_num])
        home_pts.extend([schedule_data.loc[i, 'home_pts']])
        visitor_pts.extend([schedule_data.loc[i, 'visitor_pts']])

    point_num_data = pd.DataFrame({'home_score': home_pts, 'visitor_score': visitor_pts,
                                   'home_ga_num': home_ga_nums, 'visitor_ga_num': visitor_ga_nums})

    return point_num_data
예제 #2
0
def get_prediction(schedule_data, historic_schedule, individual, days_predict,
                   team_data1, team_data2, team_cols1, team_cols2,
                   player_data1, player_data2, player_cols1, player_cols2,
                   health_data, slope, intercept):
    # future_schedule
    predict_schedule = schedule_data[(schedule_data['days_ago'] <= 0) & (schedule_data['days_ago'] >= -days_predict)]
    num_games_loop = len(predict_schedule)
    print('Num games to predict: ' + str(num_games_loop))
    pred_winner_list = []
    home_score_pred = []
    visitor_score_pred = []
    for i in range(num_games_loop):
        team_home = predict_schedule.loc[i, 'home']
        team_visitor = predict_schedule.loc[i, 'visitor']
        game_date = predict_schedule.loc[i, 'date']

        [team_nums_home1] = np.array(team_data1[team_data1['team'] == team_home])
        [team_nums_home2] = np.array(team_data2[team_data2['team'] == team_home])
        [team_nums_visitor1] = np.array(team_data1[team_data1['team'] == team_visitor])
        [team_nums_visitor2] = np.array(team_data2[team_data2['team'] == team_visitor])

        team_nums_home = np.hstack((team_nums_home1[team_cols1], team_nums_home2[team_cols2]))
        team_nums_visitor = np.hstack((team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2]))

        player_nums_home1 = np.array(player_data1[player_data1['team'] == team_home])
        player_nums_home2 = np.array(player_data2[player_data2['team'] == team_home])
        player_nums_visitor1 = np.array(player_data1[player_data1['team'] == team_visitor])
        player_nums_visitor2 = np.array(player_data2[player_data2['team'] == team_visitor])

        player_nums_home = np.hstack((player_nums_home1[:,player_cols1], player_nums_home2[:,player_cols2]))
        player_nums_visitor = np.hstack((player_nums_visitor1[:,player_cols1], player_nums_visitor2[:,player_cols2]))

        schedule_stats_home = GA_Funs.get_schedule_stats(predict_schedule, historic_schedule,
                                                         game_date, team_home, future_logical=True)

        schedule_stats_visitor = GA_Funs.get_schedule_stats(predict_schedule, historic_schedule,
                                                            game_date, team_visitor, future_logical=True)

        player_indices = range(len(player_cols1) + len(player_cols2))
        team_indices = range(len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices))
        schedule_indices = range(len(team_indices) + len(player_indices), len(individual))

        health_mod_home = []
        for x in player_data1['name'][player_data1['team'] == team_home]:
            if x in list(health_data['name']):
                health_mod_home.extend(health_data.loc[health_data['name']==x,'health_mod'])
            else:
                health_mod_home.extend([1.0])

        health_mod_visitor = []
        for x in player_data1['name'][player_data1['team'] == team_visitor]:
            if x in list(health_data['name']):
                health_mod_visitor.extend(health_data.loc[health_data['name']==x,'health_mod'])
            else:
                health_mod_visitor.extend([1.0])

        home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \
            np.array(team_nums_home).dot(individual[team_indices]) + \
            np.array(schedule_stats_home).dot(individual[schedule_indices])

        visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \
            np.array(team_nums_visitor).dot(individual[team_indices]) + \
            np.array(schedule_stats_visitor).dot(individual[schedule_indices])

        if visitor_num > home_num:
            pred_winner = team_visitor
        else:
            pred_winner = team_home

        pred_winner_list.append(pred_winner)
        temp_home_score = slope * home_num + intercept
        temp_visitor_score = slope * visitor_num + intercept
        home_score_pred.append(temp_home_score)
        visitor_score_pred.append(temp_visitor_score)

        pred_frame = pd.DataFrame({'winner': pred_winner_list, 'home_score_pred': home_score_pred,
                                   'visitor_score_pred': visitor_score_pred})

    return pred_frame
예제 #3
0
schedule_data['winner'] = schedule_data.apply(get_winner, axis=1)

logging.info('Saving Game Data to Database')
saveFrameToTable(dataFrame=schedule_data, tableName='schedule_data', sqldbName='NBA_data', dbFolder=data_folder, e_option='replace')

######
# Create population parameters

upper_bounds = np.repeat(1, player_char_num + team_char_num + schedule_char_num)
lower_bounds = np.repeat(-1, player_char_num + team_char_num + schedule_char_num)

######
# Create initial population
logging.info('Initializing Population')
population = GA_Funs.create_population(lower_bounds, upper_bounds, n=N_pop, type_fill='random')


######
# Assess initial fitness

fitness = np.repeat(0.0, N_pop)

for n in range(N_pop):
    fitness[n] = GA_Funs.get_fitness(population[n,], schedule_data,
                                             team_data1, team_data2, team_cols1, team_cols2,
                                             player_data1, player_data2, player_cols1, player_cols2, health_data)


######
# Start Genetic Algorithm
예제 #4
0
def get_GA_nums(schedule_data, individual, team_data1, team_data2, team_cols1,
                team_cols2, player_data1, player_data2, player_cols1,
                player_cols2, health_data):
    num_games_loop = len(schedule_data)
    print('Num games to predict: ' + str(num_games_loop))
    home_ga_nums = []
    visitor_ga_nums = []
    home_pts = []
    visitor_pts = []
    for i in range(num_games_loop):
        team_home = schedule_data.loc[i, 'home']
        team_visitor = schedule_data.loc[i, 'visitor']
        game_date = schedule_data.loc[i, 'date']

        [team_nums_home1
         ] = np.array(team_data1[team_data1['team'] == team_home])
        [team_nums_home2
         ] = np.array(team_data2[team_data2['team'] == team_home])
        [team_nums_visitor1
         ] = np.array(team_data1[team_data1['team'] == team_visitor])
        [team_nums_visitor2
         ] = np.array(team_data2[team_data2['team'] == team_visitor])

        team_nums_home = np.hstack(
            (team_nums_home1[team_cols1], team_nums_home2[team_cols2]))
        team_nums_visitor = np.hstack(
            (team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2]))

        player_nums_home1 = np.array(
            player_data1[player_data1['team'] == team_home])
        player_nums_home2 = np.array(
            player_data2[player_data2['team'] == team_home])
        player_nums_visitor1 = np.array(
            player_data1[player_data1['team'] == team_visitor])
        player_nums_visitor2 = np.array(
            player_data2[player_data2['team'] == team_visitor])

        player_nums_home = np.hstack(
            (player_nums_home1[:,
                               player_cols1], player_nums_home2[:,
                                                                player_cols2]))
        player_nums_visitor = np.hstack((player_nums_visitor1[:, player_cols1],
                                         player_nums_visitor2[:,
                                                              player_cols2]))

        schedule_stats_home = GA_Funs.get_schedule_stats(schedule_data,
                                                         schedule_data,
                                                         game_date,
                                                         team_home,
                                                         future_logical=False)

        schedule_stats_visitor = GA_Funs.get_schedule_stats(
            schedule_data,
            schedule_data,
            game_date,
            team_visitor,
            future_logical=False)

        player_indices = range(len(player_cols1) + len(player_cols2))
        team_indices = range(
            len(player_indices),
            len(team_cols1) + len(team_cols2) + len(player_indices))
        schedule_indices = range(
            len(team_indices) + len(player_indices), len(individual))

        health_mod_home = []
        for x in player_data1['name'][player_data1['team'] == team_home]:
            if x in list(health_data['name']):
                health_mod_home.extend(
                    health_data.loc[health_data['name'] == x, 'health_mod'])
            else:
                health_mod_home.extend([1.0])

        health_mod_visitor = []
        for x in player_data1['name'][player_data1['team'] == team_visitor]:
            if x in list(health_data['name']):
                health_mod_visitor.extend(
                    health_data.loc[health_data['name'] == x, 'health_mod'])
            else:
                health_mod_visitor.extend([1.0])

        home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \
            np.array(team_nums_home).dot(individual[team_indices]) + \
            np.array(schedule_stats_home).dot(individual[schedule_indices])

        visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \
            np.array(team_nums_visitor).dot(individual[team_indices]) + \
            np.array(schedule_stats_visitor).dot(individual[schedule_indices])

        home_ga_nums.extend([home_num])
        visitor_ga_nums.extend([visitor_num])
        home_pts.extend([schedule_data.loc[i, 'home_pts']])
        visitor_pts.extend([schedule_data.loc[i, 'visitor_pts']])

    point_num_data = pd.DataFrame({
        'home_score': home_pts,
        'visitor_score': visitor_pts,
        'home_ga_num': home_ga_nums,
        'visitor_ga_num': visitor_ga_nums
    })

    return point_num_data
예제 #5
0
def get_prediction(schedule_data, historic_schedule, individual, days_predict,
                   team_data1, team_data2, team_cols1, team_cols2,
                   player_data1, player_data2, player_cols1, player_cols2,
                   health_data, slope, intercept):
    # future_schedule
    predict_schedule = schedule_data[(schedule_data['days_ago'] <= 0) & (
        schedule_data['days_ago'] >= -days_predict)]
    num_games_loop = len(predict_schedule)
    print('Num games to predict: ' + str(num_games_loop))
    pred_winner_list = []
    home_score_pred = []
    visitor_score_pred = []
    for i in range(num_games_loop):
        team_home = predict_schedule.loc[i, 'home']
        team_visitor = predict_schedule.loc[i, 'visitor']
        game_date = predict_schedule.loc[i, 'date']

        [team_nums_home1
         ] = np.array(team_data1[team_data1['team'] == team_home])
        [team_nums_home2
         ] = np.array(team_data2[team_data2['team'] == team_home])
        [team_nums_visitor1
         ] = np.array(team_data1[team_data1['team'] == team_visitor])
        [team_nums_visitor2
         ] = np.array(team_data2[team_data2['team'] == team_visitor])

        team_nums_home = np.hstack(
            (team_nums_home1[team_cols1], team_nums_home2[team_cols2]))
        team_nums_visitor = np.hstack(
            (team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2]))

        player_nums_home1 = np.array(
            player_data1[player_data1['team'] == team_home])
        player_nums_home2 = np.array(
            player_data2[player_data2['team'] == team_home])
        player_nums_visitor1 = np.array(
            player_data1[player_data1['team'] == team_visitor])
        player_nums_visitor2 = np.array(
            player_data2[player_data2['team'] == team_visitor])

        player_nums_home = np.hstack(
            (player_nums_home1[:,
                               player_cols1], player_nums_home2[:,
                                                                player_cols2]))
        player_nums_visitor = np.hstack((player_nums_visitor1[:, player_cols1],
                                         player_nums_visitor2[:,
                                                              player_cols2]))

        schedule_stats_home = GA_Funs.get_schedule_stats(predict_schedule,
                                                         historic_schedule,
                                                         game_date,
                                                         team_home,
                                                         future_logical=True)

        schedule_stats_visitor = GA_Funs.get_schedule_stats(
            predict_schedule,
            historic_schedule,
            game_date,
            team_visitor,
            future_logical=True)

        player_indices = range(len(player_cols1) + len(player_cols2))
        team_indices = range(
            len(player_indices),
            len(team_cols1) + len(team_cols2) + len(player_indices))
        schedule_indices = range(
            len(team_indices) + len(player_indices), len(individual))

        health_mod_home = []
        for x in player_data1['name'][player_data1['team'] == team_home]:
            if x in list(health_data['name']):
                health_mod_home.extend(
                    health_data.loc[health_data['name'] == x, 'health_mod'])
            else:
                health_mod_home.extend([1.0])

        health_mod_visitor = []
        for x in player_data1['name'][player_data1['team'] == team_visitor]:
            if x in list(health_data['name']):
                health_mod_visitor.extend(
                    health_data.loc[health_data['name'] == x, 'health_mod'])
            else:
                health_mod_visitor.extend([1.0])

        home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \
            np.array(team_nums_home).dot(individual[team_indices]) + \
            np.array(schedule_stats_home).dot(individual[schedule_indices])

        visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \
            np.array(team_nums_visitor).dot(individual[team_indices]) + \
            np.array(schedule_stats_visitor).dot(individual[schedule_indices])

        if visitor_num > home_num:
            pred_winner = team_visitor
        else:
            pred_winner = team_home

        pred_winner_list.append(pred_winner)
        temp_home_score = slope * home_num + intercept
        temp_visitor_score = slope * visitor_num + intercept
        home_score_pred.append(temp_home_score)
        visitor_score_pred.append(temp_visitor_score)

        pred_frame = pd.DataFrame({
            'winner': pred_winner_list,
            'home_score_pred': home_score_pred,
            'visitor_score_pred': visitor_score_pred
        })

    return pred_frame
예제 #6
0
                 dbFolder=data_folder,
                 e_option='replace')

######
# Create population parameters

upper_bounds = np.repeat(1,
                         player_char_num + team_char_num + schedule_char_num)
lower_bounds = np.repeat(-1,
                         player_char_num + team_char_num + schedule_char_num)

######
# Create initial population
logging.info('Initializing Population')
population = GA_Funs.create_population(lower_bounds,
                                       upper_bounds,
                                       n=N_pop,
                                       type_fill='random')

######
# Assess initial fitness

fitness = np.repeat(0.0, N_pop)

for n in range(N_pop):
    fitness[n] = GA_Funs.get_fitness(population[n, ], schedule_data,
                                     team_data1, team_data2, team_cols1,
                                     team_cols2, player_data1, player_data2,
                                     player_cols1, player_cols2, health_data)

######
# Start Genetic Algorithm