def param_optimization(): starttime = time.time() epsilons = [0.1, 0.05] discount_factors = [0.9, 0.5] learning_rate = [0.5, 0.25, 0.1, 0.05] combination = [] multiprocess = [] for e in epsilons: for d in discount_factors: for l in learning_rate: p = multiprocessing.Process( target=play_with_on_QLearning_thread, args=(10000, e, d, l)) multiprocess.append(p) p.start() for index, process in enumerate(multiprocess): process.join() print('That took {} seconds'.format(time.time() - starttime)) Plot = PlotStatistics() Plot.plotMultiple(pathToFolder="Param_optimization", numMovAvg=1000)
def plot(): Plot = PlotStatistics() labels = [ r'$\theta_{1}$', r'$\theta_{2}$', r'$\theta_{3}$', r'$\theta_{4}$' ] Plot.plot_chromosome_2D( path_to_csv='GA_simple_logs/simple_ga_chromosomes.csv', labels=labels)
def normal_training(): players = [LudoPlayerRandom() for _ in range(3)] # GA optimized param # e = 0.40463712 # d = 0.14343606 # a = 0.10783296 # Exhaustive search param e = 0.05 d = 0.5 a = 0.25 # players.append(LudoPlayerQLearningToken("epsilon greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=e, discount_factor=d, learning_rate=a)) # for i, player in enumerate(players): # player.id = i # selv tildele atributter uden defineret i klassen # score = [0, 0, 0, 0] # n = 40000 # start_time = time.time() # tqdm_1 = tqdm(range(n), ascii=True) # for i in tqdm_1: # tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}") # random.shuffle(players) # ludoGame = LudoGame(players) # winner = ludoGame.play_full_game() # score[players[winner].id] += 1 # for player in players: # Saving reward for QLearning player # if type(player)==LudoPlayerQLearningToken: # player.append_reward() # for player in players: # if type(player)==LudoPlayerQLearningToken: # player.saveQTable() # only one player that is Qlearning # player.saveReward() # duration = time.time() - start_time # print('win distribution:', score) # print('win distribution percentage', (score/np.sum(score))*100) # print('games per second:', n / duration) Plot = PlotStatistics() Plot.plotReward( pathToCSV= f'Qlearning_token_logs/Reward_token_40000_e-{e}_d-{d}_a-{a}.csv', numMovAvg=1000)
def normal_play(): players = [] players = [LudoPlayerRandom() for _ in range(3)] epsilon = 0.05 #0.40463712 #0.05 # discount_factor = 0.5 #0.14343606 #0.5 # learning_rate = 0.25 #0.10783296 #0.25 # parameters = [epsilon, discount_factor, learning_rate] t1 = LudoPlayerQLearningAction( parameters, chosenPolicy="epsilon greedy", QtableName='Qlearning_action_logs/1QTable_action_r_win', RewardName='Qlearning_action_logs/1Reward_action_r_win') players.append(t1) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] # n = 40000 # start_time = time.time() # tqdm_1 = tqdm(range(n), ascii=True) # for i in tqdm_1: # tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}") # random.shuffle(players) # ludoGame = LudoGame(players) # winner = ludoGame.play_full_game() # score[players[winner].id] += 1 # for player in players: # Saving reward for QLearning player # if type(player)==LudoPlayerQLearningAction: # player.append_reward() # for player in players: # if type(player)==LudoPlayerQLearningAction: # player.saveQTable() # player.saveReward() # duration = time.time() - start_time # print('win distribution percentage', (score/np.sum(score))*100) # print('win distribution:', score) Plot = PlotStatistics() Plot.plotReward( pathToCSV= f'Qlearning_action_logs/1Reward_action_r_win_e-{epsilon}_d-{discount_factor}_a-{learning_rate}.csv', numMovAvg=1000)
def main(): players = [] players = [LudoPlayerRandom() for _ in range(3)] epsilon = 0.1 discount_factor = 0.5 learning_rate = 0.1 parameters = [epsilon, discount_factor, learning_rate] t1 = LudoPlayerQLearningSimple(parameters, chosenPolicy="greedy", QtableName='1_QTable_simple', RewardName='1_Reward_simple') players.append(t1) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = 5000 start_time = time.time() tqdm_1 = tqdm(range(n), ascii=True) for i in tqdm_1: tqdm_1.set_description_str( f"win rates {np.around(score/np.sum(score),decimals=2)*100}") random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 for player in players: # Saving reward for QLearning player if type(player) == LudoPlayerQLearningSimple: player.append_reward() player.reset_upd_val() # player.rewards.append(player.total_reward) # player.total_reward = 0 for player in players: if type(player) == LudoPlayerQLearningSimple: player.saveQTable() player.saveReward() duration = time.time() - start_time print('win distribution:', score) Plot = PlotStatistics() Plot.plotReward(pathToCSV='1_Reward_simple_e-0.1_d-0.5_a-0.1.csv', numMovAvg=1000)
def normal_training(): players = [LudoPlayerRandom() for _ in range(3)] players.append( LudoPlayerQLearningFullAction("epsilon greedy", QtableName='1_QTable_action_full_r_cum', RewardName='1_Reward_action_full_r_cum', epsilon=0.1, discount_factor=0.5, learning_rate=0.1)) for i, player in enumerate(players): player.id = i # selv tildele atributter uden defineret i klassen score = [0, 0, 0, 0] n = 15000 start_time = time.time() tqdm_1 = tqdm(range(n), ascii=True) for i in tqdm_1: tqdm_1.set_description_str( f"win rates {np.around(score/np.sum(score),decimals=2)*100}") random.shuffle(players) ludoGame = LudoGame(players) winner = ludoGame.play_full_game() score[players[winner].id] += 1 for player in players: # Saving reward for QLearning player if type(player) == LudoPlayerQLearningFullAction: player.append_reward() for player in players: if type(player) == LudoPlayerQLearningFullAction: player.saveQTable() # only one player that is Qlearning player.saveReward() duration = time.time() - start_time print('win distribution:', score) print('win distribution percentage', (score / np.sum(score)) * 100) print('games per second:', n / duration) Plot = PlotStatistics() Plot.plotReward( pathToCSV='1_Reward_action_full_r_cum_e-0.1_d-0.5_a-0.1.csv', numMovAvg=1000)
def plot(): Plot = PlotStatistics() labels = [r'$\epsilon$', r'$\gamma$', r'$\alpha$'] #Plot.plot_chromosome_2D(path_to_csv='GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv', labels=labels) Plot.plot_chromosome_3D( path_to_csv= 'GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv') Plot.plot_chromosome_2D_qlearning( path_to_csv= 'GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv', labels=labels)
# for i, player in enumerate(players): # player.id = i # selv tildele atributter uden defineret i klassen # score = [0, 0, 0, 0] # tqdm_1 = tqdm(range(n_games), ascii=True) # for i in tqdm_1: # random.shuffle(players) # ludoGame = LudoGame(players) # winner = ludoGame.play_full_game() # score[players[winner].id] += 1 # tqdm_1.set_description_str(f"Validating best player: win rates {np.around(score/np.sum(score),decimals=2)*100}") Plot = PlotStatistics() Plot.plot_chromosome_2D(path_to_csv='chromosomes_plot.csv') ##### TESTING FOR QLEARNING PARAMETERS ##### # basetournement_test = basetournement(LudoPlayerQLearningSimple, 20, type_player='qlearning') # 32 chromomes max out the thread count of this computer. # chromosomes = np.random.uniform(low=0, high=1, size=(20, 3)) # 32 chromomes max out the thread count of this computer. # print(chromosomes) # best_player = basetournement_test.play_for_generations(chromosomes, tournament_it=100, generations_it=20, validation_it=1) # problem gets the same 5 chromosome as the best # print(f'The best player chosen has chromosome {best_player.chromosome}') # Evaluting the performance of the best player # players = [LudoPlayerRandom() for _ in range(3)] # players.append(best_player)