Example #1
0
def test():
    """
    Testing function : Training for a given game and 2 agents, and Battle between both agents
    """

    #############################################
    ################ Choose Game ################
    #############################################
    #game = Soccer()
    #game = RockPaperScissors()
    game = GridWorld()

    playerA, playerB = game.players()[0], game.players()[1]  #player ID

    #############################################
    ### Choose Player 1 and training opponent ###
    #############################################
    player1 = WoLF_PHC_Agent(game, playerA)
    opponent1 = WoLF_PHC_Agent(game, playerB)

    #############################################
    ### Choose Player 2 and training opponent ###
    #############################################
    player2 = Minimax_Q_Agent(game, playerB)
    opponent2 = Random_Agent(game, playerA)

    #############################################
    ############## Train Policies ###############
    #############################################
    nb_iterations = 500000
    timestamp = 1000

    start_time = time.time()
    policy1, policyb = Scheduler(game, nb_iterations, timestamp, player1,
                                 opponent1)
    print("Learning Time Player 1: ", time.time() - start_time)

    start_time = time.time()
    policyc, policy2 = Scheduler(game, nb_iterations, timestamp, opponent2,
                                 player2)
    print("Learning Time Player 2: ", time.time() - start_time)

    #policy 1 : distances between Nash 1 and 2
    optimal_Nash1_player0 = GridWorld_Nash1_Player0_Agent(game)
    optimal_Nash1_player0.compute_policy()
    optimal_Nash2_player0 = GridWorld_Nash2_Player0_Agent(game)
    optimal_Nash2_player0.compute_policy()
    d10 = []
    d20 = []
    for i in range(len(policy1)):
        d10.append(distance(policy1[i], optimal_Nash1_player0.pi))
        d20.append(distance(policy1[i], optimal_Nash2_player0.pi))

    #policy 2 : distances between Nash 1 and 2
    optimal_Nash1_player1 = GridWorld_Nash1_Player1_Agent(game)
    optimal_Nash1_player1.compute_policy()
    optimal_Nash2_player1 = GridWorld_Nash2_Player1_Agent(game)
    optimal_Nash2_player1.compute_policy()
    d11 = []
    d21 = []
    for i in range(len(policy2)):
        d11.append(distance(policy2[i], optimal_Nash1_player1.pi))
        d21.append(distance(policy2[i], optimal_Nash2_player1.pi))

    #plot :
    plt.plot(d10, 'b')
    plt.plot(d20, 'b--')
    plt.plot(d11, 'r')
    plt.plot(d21, 'r--')
    plt.show()

    #Battle :
    print("Battle")
    nbplay = 1000
    affrontement(game, policy1[-1], policy2[-1], nbplay)
    return (policy1, policy2)


#test()