def normal_training(): players = [LudoPlayerRandom() for _ in range(3)] # GA optimized param # e = 0.40463712 # d = 0.14343606 # a = 0.10783296 # Exhaustive search param e = 0.05 d = 0.5 a = 0.25 # players.append(LudoPlayerQLearningToken("epsilon greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=e, discount_factor=d, learning_rate=a)) # for i, player in enumerate(players): # player.id = i # selv tildele atributter uden defineret i klassen # score = [0, 0, 0, 0] # n = 40000 # start_time = time.time() # tqdm_1 = tqdm(range(n), ascii=True) # for i in tqdm_1: # tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}") # random.shuffle(players) # ludoGame = LudoGame(players) # winner = ludoGame.play_full_game() # score[players[winner].id] += 1 # for player in players: # Saving reward for QLearning player # if type(player)==LudoPlayerQLearningToken: # player.append_reward() # for player in players: # if type(player)==LudoPlayerQLearningToken: # player.saveQTable() # only one player that is Qlearning # player.saveReward() # duration = time.time() - start_time # print('win distribution:', score) # print('win distribution percentage', (score/np.sum(score))*100) # print('games per second:', n / duration) Plot = PlotStatistics() Plot.plotReward( pathToCSV= f'Qlearning_token_logs/Reward_token_40000_e-{e}_d-{d}_a-{a}.csv', numMovAvg=1000)
def normal_play(): players = [] players = [LudoPlayerRandom() for _ in range(3)] epsilon = 0.05 #0.40463712 #0.05 # discount_factor = 0.5 #0.14343606 #0.5 # learning_rate = 0.25 #0.10783296 #0.25 # parameters = [epsilon, discount_factor, learning_rate] t1 = LudoPlayerQLearningAction( parameters, chosenPolicy="epsilon greedy", QtableName='Qlearning_action_logs/1QTable_action_r_win', RewardName='Qlearning_action_logs/1Reward_action_r_win') players.append(t1) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] # n = 40000 # start_time = time.time() # tqdm_1 = tqdm(range(n), ascii=True) # for i in tqdm_1: # tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}") # random.shuffle(players) # ludoGame = LudoGame(players) # winner = ludoGame.play_full_game() # score[players[winner].id] += 1 # for player in players: # Saving reward for QLearning player # if type(player)==LudoPlayerQLearningAction: # player.append_reward() # for player in players: # if type(player)==LudoPlayerQLearningAction: # player.saveQTable() # player.saveReward() # duration = time.time() - start_time # print('win distribution percentage', (score/np.sum(score))*100) # print('win distribution:', score) Plot = PlotStatistics() Plot.plotReward( pathToCSV= f'Qlearning_action_logs/1Reward_action_r_win_e-{epsilon}_d-{discount_factor}_a-{learning_rate}.csv', numMovAvg=1000)
def main(): players = [] players = [LudoPlayerRandom() for _ in range(3)] epsilon = 0.1 discount_factor = 0.5 learning_rate = 0.1 parameters = [epsilon, discount_factor, learning_rate] t1 = LudoPlayerQLearningSimple(parameters, chosenPolicy="greedy", QtableName='1_QTable_simple', RewardName='1_Reward_simple') players.append(t1) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = 5000 start_time = time.time() tqdm_1 = tqdm(range(n), ascii=True) for i in tqdm_1: tqdm_1.set_description_str( f"win rates {np.around(score/np.sum(score),decimals=2)*100}") random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 for player in players: # Saving reward for QLearning player if type(player) == LudoPlayerQLearningSimple: player.append_reward() player.reset_upd_val() # player.rewards.append(player.total_reward) # player.total_reward = 0 for player in players: if type(player) == LudoPlayerQLearningSimple: player.saveQTable() player.saveReward() duration = time.time() - start_time print('win distribution:', score) Plot = PlotStatistics() Plot.plotReward(pathToCSV='1_Reward_simple_e-0.1_d-0.5_a-0.1.csv', numMovAvg=1000)
def normal_training(): players = [LudoPlayerRandom() for _ in range(3)] players.append( LudoPlayerQLearningFullAction("epsilon greedy", QtableName='1_QTable_action_full_r_cum', RewardName='1_Reward_action_full_r_cum', epsilon=0.1, discount_factor=0.5, learning_rate=0.1)) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = 15000 start_time = time.time() tqdm_1 = tqdm(range(n), ascii=True) for i in tqdm_1: tqdm_1.set_description_str( f"win rates {np.around(score/np.sum(score),decimals=2)*100}") random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 for player in players: # Saving reward for QLearning player if type(player) == LudoPlayerQLearningFullAction: player.append_reward() for player in players: if type(player) == LudoPlayerQLearningFullAction: player.saveQTable() # only one player that is Qlearning player.saveReward() duration = time.time() - start_time print('win distribution:', score) print('win distribution percentage', (score / np.sum(score)) * 100) print('games per second:', n / duration) Plot = PlotStatistics() Plot.plotReward( pathToCSV='1_Reward_action_full_r_cum_e-0.1_d-0.5_a-0.1.csv', numMovAvg=1000)
def __play_against_random(self, best_pop_player, game_iterations, p_id, win_rate): players = [LudoPlayerRandom() for _ in range(3)] players.append(best_pop_player) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] for i in range(game_iterations): random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 win_rate[p_id] = score[3] print(f'Chromosome {best_pop_player.chromosome} scored win percentage against random {np.around(score/np.sum(score),decimals=2)*100}')
def play_with_on_QLearning_thread(num_games, epsilon, discount_factor, learning_rate): players = [LudoPlayerRandom() for _ in range(3)] players.append( LudoPlayerQLearningToken("epsilon greedy", QtableName='Param_optimization/QTable', RewardName='Param_optimization/Reward', epsilon=epsilon, discount_factor=discount_factor, learning_rate=learning_rate)) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = num_games for i in range(n): random.shuffle(players) ludoGame = LudoGame(players) for player in players: # Saving reward for QLearning player if type(player) == LudoPlayerQLearningToken: player.append_reward() winner = ludoGame.play_full_game() score[players[winner].id] += 1 if i % 2500 == 0: print('Game ', i, ' done') for player in players: if type(player) == LudoPlayerQLearningToken: player.saveQTable() player.saveReward() print( f'Player with eps={epsilon}, discountfactor={discount_factor} and learningrate={learning_rate} won {np.around(score/np.sum(score),decimals=2)*100}' )
def eval(best_player): # Evaluting the performance of the best player players = [LudoPlayerRandom() for _ in range(2)] players.append(best_player) players.append(best_player) n_games = 10000 for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] tqdm_1 = tqdm(range(n_games), ascii=True) for i in tqdm_1: random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 tqdm_1.set_description_str( f"Validating best player: win rates {np.around(score/np.sum(score),decimals=2)*100}" )
def play_on_thread(num_games, player_type, opponent_type): # Player initilization if player_type is LudoPlayerRandom: players = [player_type() for _ in range(2)] elif player_type is LudoPlayerQLearningAction: # Exhuastive search parameters epsilon = 0.05 discount_factor = 0.5 learning_rate = 0.25 parameters = [epsilon, discount_factor, learning_rate] players = [ LudoPlayerQLearningAction( parameters, chosenPolicy="greedy", QtableName='Qlearning_action_logs/QTable_action_r_win', RewardName='Qlearning_action_logs/Reward_action_r_win') for _ in range(2) ] elif player_type is LudoPlayerQLearningToken: # Exhuastive search parameters epsilon = 0.05 discount_factor = 0.5 learning_rate = 0.25 players = [ LudoPlayerQLearningToken( "greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=epsilon, discount_factor=discount_factor, learning_rate=learning_rate) for _ in range(2) ] elif player_type is simple_GA_player: chromosome = [0.800085, 2.05562201, 0.55735083, -0.9978861] players = [simple_GA_player(chromosome) for _ in range(2)] elif player_type is MathiasPlayer: players = [MathiasPlayer() for _ in range(2)] elif player_type is smart_player: players = [smart_player() for _ in range(2)] else: print("player not found please check correctly added") return # Opponent initilization if opponent_type is LudoPlayerRandom: players += [LudoPlayerRandom() for _ in range(2)] elif opponent_type is LudoPlayerQLearningAction: # Exhuastive search parameters epsilon = 0.05 discount_factor = 0.5 learning_rate = 0.25 parameters = [epsilon, discount_factor, learning_rate] players += [ LudoPlayerQLearningAction( parameters, chosenPolicy="greedy", QtableName='Qlearning_action_logs/QTable_action_r_win', RewardName='Qlearning_action_logs/Reward_action_r_win') for _ in range(2) ] elif opponent_type is LudoPlayerQLearningToken: # Exhuastive search parameters epsilon = 0.05 discount_factor = 0.5 learning_rate = 0.25 players += [ LudoPlayerQLearningToken( "greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=epsilon, discount_factor=discount_factor, learning_rate=learning_rate) for _ in range(2) ] elif opponent_type is simple_GA_player: chromosome = [0.800085, 2.05562201, 0.55735083, -0.9978861] players += [simple_GA_player(chromosome) for _ in range(2)] elif opponent_type is MathiasPlayer: players += [MathiasPlayer() for _ in range(2)] elif player_type is smart_player: players = [smart_player() for _ in range(2)] else: print("player not found please check correctly added") return for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = num_games for i in range(n): random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 # if i%2500 == 0: # print('Game ', i, ' done') win_rate_percent_player = np.sum((score / np.sum(score))[0:2]) * 100 win_rate_percent_opponent = np.sum((score / np.sum(score))[2:]) * 100 significant_player = binominal_test(num_games, np.sum(score[0:2])) significant_opponent = binominal_test(num_games, np.sum(score[2:])) print( f'Player: {player_type.name}, Won: {win_rate_percent_player:.2f}, Significant: {significant_player} VERSUS. Opponent: {opponent_type.name}, Won: {win_rate_percent_opponent:.2f}, Significant: {significant_opponent}' )