def get_GA_nums(schedule_data, individual, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data): num_games_loop = len(schedule_data) print('Num games to predict: ' + str(num_games_loop)) home_ga_nums = [] visitor_ga_nums = [] home_pts = [] visitor_pts = [] for i in range(num_games_loop): team_home = schedule_data.loc[i, 'home'] team_visitor = schedule_data.loc[i, 'visitor'] game_date = schedule_data.loc[i, 'date'] [team_nums_home1] = np.array(team_data1[team_data1['team'] == team_home]) [team_nums_home2] = np.array(team_data2[team_data2['team'] == team_home]) [team_nums_visitor1] = np.array(team_data1[team_data1['team'] == team_visitor]) [team_nums_visitor2] = np.array(team_data2[team_data2['team'] == team_visitor]) team_nums_home = np.hstack((team_nums_home1[team_cols1], team_nums_home2[team_cols2])) team_nums_visitor = np.hstack((team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2])) player_nums_home1 = np.array(player_data1[player_data1['team'] == team_home]) player_nums_home2 = np.array(player_data2[player_data2['team'] == team_home]) player_nums_visitor1 = np.array(player_data1[player_data1['team'] == team_visitor]) player_nums_visitor2 = np.array(player_data2[player_data2['team'] == team_visitor]) player_nums_home = np.hstack((player_nums_home1[:,player_cols1], player_nums_home2[:,player_cols2])) player_nums_visitor = np.hstack((player_nums_visitor1[:,player_cols1], player_nums_visitor2[:,player_cols2])) schedule_stats_home = GA_Funs.get_schedule_stats(schedule_data, schedule_data, game_date, team_home, future_logical=False) schedule_stats_visitor = GA_Funs.get_schedule_stats(schedule_data, schedule_data, game_date, team_visitor, future_logical=False) player_indices = range(len(player_cols1) + len(player_cols2)) team_indices = range(len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices)) schedule_indices = range(len(team_indices) + len(player_indices), len(individual)) health_mod_home = [] for x in player_data1['name'][player_data1['team'] == team_home]: if x in list(health_data['name']): health_mod_home.extend(health_data.loc[health_data['name']==x,'health_mod']) else: health_mod_home.extend([1.0]) health_mod_visitor = [] for x in player_data1['name'][player_data1['team'] == team_visitor]: if x in list(health_data['name']): health_mod_visitor.extend(health_data.loc[health_data['name']==x,'health_mod']) else: health_mod_visitor.extend([1.0]) home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \ np.array(team_nums_home).dot(individual[team_indices]) + \ np.array(schedule_stats_home).dot(individual[schedule_indices]) visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \ np.array(team_nums_visitor).dot(individual[team_indices]) + \ np.array(schedule_stats_visitor).dot(individual[schedule_indices]) home_ga_nums.extend([home_num]) visitor_ga_nums.extend([visitor_num]) home_pts.extend([schedule_data.loc[i, 'home_pts']]) visitor_pts.extend([schedule_data.loc[i, 'visitor_pts']]) point_num_data = pd.DataFrame({'home_score': home_pts, 'visitor_score': visitor_pts, 'home_ga_num': home_ga_nums, 'visitor_ga_num': visitor_ga_nums}) return point_num_data
def get_prediction(schedule_data, historic_schedule, individual, days_predict, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data, slope, intercept): # future_schedule predict_schedule = schedule_data[(schedule_data['days_ago'] <= 0) & (schedule_data['days_ago'] >= -days_predict)] num_games_loop = len(predict_schedule) print('Num games to predict: ' + str(num_games_loop)) pred_winner_list = [] home_score_pred = [] visitor_score_pred = [] for i in range(num_games_loop): team_home = predict_schedule.loc[i, 'home'] team_visitor = predict_schedule.loc[i, 'visitor'] game_date = predict_schedule.loc[i, 'date'] [team_nums_home1] = np.array(team_data1[team_data1['team'] == team_home]) [team_nums_home2] = np.array(team_data2[team_data2['team'] == team_home]) [team_nums_visitor1] = np.array(team_data1[team_data1['team'] == team_visitor]) [team_nums_visitor2] = np.array(team_data2[team_data2['team'] == team_visitor]) team_nums_home = np.hstack((team_nums_home1[team_cols1], team_nums_home2[team_cols2])) team_nums_visitor = np.hstack((team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2])) player_nums_home1 = np.array(player_data1[player_data1['team'] == team_home]) player_nums_home2 = np.array(player_data2[player_data2['team'] == team_home]) player_nums_visitor1 = np.array(player_data1[player_data1['team'] == team_visitor]) player_nums_visitor2 = np.array(player_data2[player_data2['team'] == team_visitor]) player_nums_home = np.hstack((player_nums_home1[:,player_cols1], player_nums_home2[:,player_cols2])) player_nums_visitor = np.hstack((player_nums_visitor1[:,player_cols1], player_nums_visitor2[:,player_cols2])) schedule_stats_home = GA_Funs.get_schedule_stats(predict_schedule, historic_schedule, game_date, team_home, future_logical=True) schedule_stats_visitor = GA_Funs.get_schedule_stats(predict_schedule, historic_schedule, game_date, team_visitor, future_logical=True) player_indices = range(len(player_cols1) + len(player_cols2)) team_indices = range(len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices)) schedule_indices = range(len(team_indices) + len(player_indices), len(individual)) health_mod_home = [] for x in player_data1['name'][player_data1['team'] == team_home]: if x in list(health_data['name']): health_mod_home.extend(health_data.loc[health_data['name']==x,'health_mod']) else: health_mod_home.extend([1.0]) health_mod_visitor = [] for x in player_data1['name'][player_data1['team'] == team_visitor]: if x in list(health_data['name']): health_mod_visitor.extend(health_data.loc[health_data['name']==x,'health_mod']) else: health_mod_visitor.extend([1.0]) home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \ np.array(team_nums_home).dot(individual[team_indices]) + \ np.array(schedule_stats_home).dot(individual[schedule_indices]) visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \ np.array(team_nums_visitor).dot(individual[team_indices]) + \ np.array(schedule_stats_visitor).dot(individual[schedule_indices]) if visitor_num > home_num: pred_winner = team_visitor else: pred_winner = team_home pred_winner_list.append(pred_winner) temp_home_score = slope * home_num + intercept temp_visitor_score = slope * visitor_num + intercept home_score_pred.append(temp_home_score) visitor_score_pred.append(temp_visitor_score) pred_frame = pd.DataFrame({'winner': pred_winner_list, 'home_score_pred': home_score_pred, 'visitor_score_pred': visitor_score_pred}) return pred_frame
schedule_data['winner'] = schedule_data.apply(get_winner, axis=1) logging.info('Saving Game Data to Database') saveFrameToTable(dataFrame=schedule_data, tableName='schedule_data', sqldbName='NBA_data', dbFolder=data_folder, e_option='replace') ###### # Create population parameters upper_bounds = np.repeat(1, player_char_num + team_char_num + schedule_char_num) lower_bounds = np.repeat(-1, player_char_num + team_char_num + schedule_char_num) ###### # Create initial population logging.info('Initializing Population') population = GA_Funs.create_population(lower_bounds, upper_bounds, n=N_pop, type_fill='random') ###### # Assess initial fitness fitness = np.repeat(0.0, N_pop) for n in range(N_pop): fitness[n] = GA_Funs.get_fitness(population[n,], schedule_data, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data) ###### # Start Genetic Algorithm
def get_GA_nums(schedule_data, individual, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data): num_games_loop = len(schedule_data) print('Num games to predict: ' + str(num_games_loop)) home_ga_nums = [] visitor_ga_nums = [] home_pts = [] visitor_pts = [] for i in range(num_games_loop): team_home = schedule_data.loc[i, 'home'] team_visitor = schedule_data.loc[i, 'visitor'] game_date = schedule_data.loc[i, 'date'] [team_nums_home1 ] = np.array(team_data1[team_data1['team'] == team_home]) [team_nums_home2 ] = np.array(team_data2[team_data2['team'] == team_home]) [team_nums_visitor1 ] = np.array(team_data1[team_data1['team'] == team_visitor]) [team_nums_visitor2 ] = np.array(team_data2[team_data2['team'] == team_visitor]) team_nums_home = np.hstack( (team_nums_home1[team_cols1], team_nums_home2[team_cols2])) team_nums_visitor = np.hstack( (team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2])) player_nums_home1 = np.array( player_data1[player_data1['team'] == team_home]) player_nums_home2 = np.array( player_data2[player_data2['team'] == team_home]) player_nums_visitor1 = np.array( player_data1[player_data1['team'] == team_visitor]) player_nums_visitor2 = np.array( player_data2[player_data2['team'] == team_visitor]) player_nums_home = np.hstack( (player_nums_home1[:, player_cols1], player_nums_home2[:, player_cols2])) player_nums_visitor = np.hstack((player_nums_visitor1[:, player_cols1], player_nums_visitor2[:, player_cols2])) schedule_stats_home = GA_Funs.get_schedule_stats(schedule_data, schedule_data, game_date, team_home, future_logical=False) schedule_stats_visitor = GA_Funs.get_schedule_stats( schedule_data, schedule_data, game_date, team_visitor, future_logical=False) player_indices = range(len(player_cols1) + len(player_cols2)) team_indices = range( len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices)) schedule_indices = range( len(team_indices) + len(player_indices), len(individual)) health_mod_home = [] for x in player_data1['name'][player_data1['team'] == team_home]: if x in list(health_data['name']): health_mod_home.extend( health_data.loc[health_data['name'] == x, 'health_mod']) else: health_mod_home.extend([1.0]) health_mod_visitor = [] for x in player_data1['name'][player_data1['team'] == team_visitor]: if x in list(health_data['name']): health_mod_visitor.extend( health_data.loc[health_data['name'] == x, 'health_mod']) else: health_mod_visitor.extend([1.0]) home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \ np.array(team_nums_home).dot(individual[team_indices]) + \ np.array(schedule_stats_home).dot(individual[schedule_indices]) visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \ np.array(team_nums_visitor).dot(individual[team_indices]) + \ np.array(schedule_stats_visitor).dot(individual[schedule_indices]) home_ga_nums.extend([home_num]) visitor_ga_nums.extend([visitor_num]) home_pts.extend([schedule_data.loc[i, 'home_pts']]) visitor_pts.extend([schedule_data.loc[i, 'visitor_pts']]) point_num_data = pd.DataFrame({ 'home_score': home_pts, 'visitor_score': visitor_pts, 'home_ga_num': home_ga_nums, 'visitor_ga_num': visitor_ga_nums }) return point_num_data
def get_prediction(schedule_data, historic_schedule, individual, days_predict, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data, slope, intercept): # future_schedule predict_schedule = schedule_data[(schedule_data['days_ago'] <= 0) & ( schedule_data['days_ago'] >= -days_predict)] num_games_loop = len(predict_schedule) print('Num games to predict: ' + str(num_games_loop)) pred_winner_list = [] home_score_pred = [] visitor_score_pred = [] for i in range(num_games_loop): team_home = predict_schedule.loc[i, 'home'] team_visitor = predict_schedule.loc[i, 'visitor'] game_date = predict_schedule.loc[i, 'date'] [team_nums_home1 ] = np.array(team_data1[team_data1['team'] == team_home]) [team_nums_home2 ] = np.array(team_data2[team_data2['team'] == team_home]) [team_nums_visitor1 ] = np.array(team_data1[team_data1['team'] == team_visitor]) [team_nums_visitor2 ] = np.array(team_data2[team_data2['team'] == team_visitor]) team_nums_home = np.hstack( (team_nums_home1[team_cols1], team_nums_home2[team_cols2])) team_nums_visitor = np.hstack( (team_nums_visitor1[team_cols1], team_nums_visitor2[team_cols2])) player_nums_home1 = np.array( player_data1[player_data1['team'] == team_home]) player_nums_home2 = np.array( player_data2[player_data2['team'] == team_home]) player_nums_visitor1 = np.array( player_data1[player_data1['team'] == team_visitor]) player_nums_visitor2 = np.array( player_data2[player_data2['team'] == team_visitor]) player_nums_home = np.hstack( (player_nums_home1[:, player_cols1], player_nums_home2[:, player_cols2])) player_nums_visitor = np.hstack((player_nums_visitor1[:, player_cols1], player_nums_visitor2[:, player_cols2])) schedule_stats_home = GA_Funs.get_schedule_stats(predict_schedule, historic_schedule, game_date, team_home, future_logical=True) schedule_stats_visitor = GA_Funs.get_schedule_stats( predict_schedule, historic_schedule, game_date, team_visitor, future_logical=True) player_indices = range(len(player_cols1) + len(player_cols2)) team_indices = range( len(player_indices), len(team_cols1) + len(team_cols2) + len(player_indices)) schedule_indices = range( len(team_indices) + len(player_indices), len(individual)) health_mod_home = [] for x in player_data1['name'][player_data1['team'] == team_home]: if x in list(health_data['name']): health_mod_home.extend( health_data.loc[health_data['name'] == x, 'health_mod']) else: health_mod_home.extend([1.0]) health_mod_visitor = [] for x in player_data1['name'][player_data1['team'] == team_visitor]: if x in list(health_data['name']): health_mod_visitor.extend( health_data.loc[health_data['name'] == x, 'health_mod']) else: health_mod_visitor.extend([1.0]) home_num = np.mean(np.array(player_nums_home).dot(individual[player_indices]) * health_mod_home) + \ np.array(team_nums_home).dot(individual[team_indices]) + \ np.array(schedule_stats_home).dot(individual[schedule_indices]) visitor_num = np.mean(np.array(player_nums_visitor).dot(individual[player_indices]) * health_mod_visitor) + \ np.array(team_nums_visitor).dot(individual[team_indices]) + \ np.array(schedule_stats_visitor).dot(individual[schedule_indices]) if visitor_num > home_num: pred_winner = team_visitor else: pred_winner = team_home pred_winner_list.append(pred_winner) temp_home_score = slope * home_num + intercept temp_visitor_score = slope * visitor_num + intercept home_score_pred.append(temp_home_score) visitor_score_pred.append(temp_visitor_score) pred_frame = pd.DataFrame({ 'winner': pred_winner_list, 'home_score_pred': home_score_pred, 'visitor_score_pred': visitor_score_pred }) return pred_frame
dbFolder=data_folder, e_option='replace') ###### # Create population parameters upper_bounds = np.repeat(1, player_char_num + team_char_num + schedule_char_num) lower_bounds = np.repeat(-1, player_char_num + team_char_num + schedule_char_num) ###### # Create initial population logging.info('Initializing Population') population = GA_Funs.create_population(lower_bounds, upper_bounds, n=N_pop, type_fill='random') ###### # Assess initial fitness fitness = np.repeat(0.0, N_pop) for n in range(N_pop): fitness[n] = GA_Funs.get_fitness(population[n, ], schedule_data, team_data1, team_data2, team_cols1, team_cols2, player_data1, player_data2, player_cols1, player_cols2, health_data) ###### # Start Genetic Algorithm