def test(): """ Testing function : Training for a given game and 2 agents, and Battle between both agents """ ############################################# ################ Choose Game ################ ############################################# #game = Soccer() #game = RockPaperScissors() game = GridWorld() playerA, playerB = game.players()[0], game.players()[1] #player ID ############################################# ### Choose Player 1 and training opponent ### ############################################# player1 = WoLF_PHC_Agent(game, playerA) opponent1 = WoLF_PHC_Agent(game, playerB) ############################################# ### Choose Player 2 and training opponent ### ############################################# player2 = Minimax_Q_Agent(game, playerB) opponent2 = Random_Agent(game, playerA) ############################################# ############## Train Policies ############### ############################################# nb_iterations = 500000 timestamp = 1000 start_time = time.time() policy1, policyb = Scheduler(game, nb_iterations, timestamp, player1, opponent1) print("Learning Time Player 1: ", time.time() - start_time) start_time = time.time() policyc, policy2 = Scheduler(game, nb_iterations, timestamp, opponent2, player2) print("Learning Time Player 2: ", time.time() - start_time) #policy 1 : distances between Nash 1 and 2 optimal_Nash1_player0 = GridWorld_Nash1_Player0_Agent(game) optimal_Nash1_player0.compute_policy() optimal_Nash2_player0 = GridWorld_Nash2_Player0_Agent(game) optimal_Nash2_player0.compute_policy() d10 = [] d20 = [] for i in range(len(policy1)): d10.append(distance(policy1[i], optimal_Nash1_player0.pi)) d20.append(distance(policy1[i], optimal_Nash2_player0.pi)) #policy 2 : distances between Nash 1 and 2 optimal_Nash1_player1 = GridWorld_Nash1_Player1_Agent(game) optimal_Nash1_player1.compute_policy() optimal_Nash2_player1 = GridWorld_Nash2_Player1_Agent(game) optimal_Nash2_player1.compute_policy() d11 = [] d21 = [] for i in range(len(policy2)): d11.append(distance(policy2[i], optimal_Nash1_player1.pi)) d21.append(distance(policy2[i], optimal_Nash2_player1.pi)) #plot : plt.plot(d10, 'b') plt.plot(d20, 'b--') plt.plot(d11, 'r') plt.plot(d21, 'r--') plt.show() #Battle : print("Battle") nbplay = 1000 affrontement(game, policy1[-1], policy2[-1], nbplay) return (policy1, policy2) #test()