Ejemplo n.º 1
0
def normal_play():
    players = []
    players = [LudoPlayerRandom() for _ in range(3)]

    epsilon = 0.05 #
    discount_factor =  0.5 #
    learning_rate = 0.25 # 
    parameters = [epsilon, discount_factor, learning_rate]

    t1 = LudoPlayerQLearningAction(parameters, chosenPolicy="greedy", QtableName='QTable', RewardName='Reward')
    players.append(t1)
    for i, player in enumerate(players):
        player.id = i # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]    

    n = 1000
    start_time = time.time()
    tqdm_1 = tqdm(range(n), ascii=True)
    for i in tqdm_1:
        tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}") 
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        for player in players: # Saving reward for QLearning player
            if type(player)==LudoPlayerQLearningAction:
                player.append_reward()

    for player in players:
        if type(player)==LudoPlayerQLearningAction:
            player.saveQTable() 
            player.saveReward()

    duration = time.time() - start_time

    print('win distribution percentage', (score/np.sum(score))*100)
    print('win distribution:', score)
Ejemplo n.º 2
0
from pyludo import LudoGame
from pyludo.StandardLudoPlayers import LudoPlayerRandom, LudoPlayerFast, LudoPlayerAggressive, LudoPlayerDefensive
import random
import time

players = [
    LudoPlayerRandom(),
    LudoPlayerFast(),
    LudoPlayerAggressive(),
    LudoPlayerDefensive(),
]

scores = {}
for player in players:
    scores[player.name] = 0

n = 1000

start_time = time.time()
for i in range(n):
    random.shuffle(players)
    ludoGame = LudoGame(players)
    winner = ludoGame.play_full_game()
    scores[players[winner].name] += 1
    print('Game ', i, ' done')
duration = time.time() - start_time

print('win distribution:', scores)
print('games per second:', n / duration)
Ejemplo n.º 3
0
    game_count = 2500

    sim_chromo = np.load(
        'genetic_algorithm/populations/Simple/Simple_Cellular-20-10_Blend-0.5_NStep/1000.pop.winner.npy'
    )
    adv_chromo = np.load(
        'genetic_algorithm/populations/Advanced/Advanced_Normal-100-10_Whole_Normal-0.1/1000.pop.winner.npy'
    )
    full_chromo = np.load(
        'genetic_algorithm/populations/Full/Full_Cellular-100-10_Whole_Normal-0.1/1000.pop.winner.npy'
    )

    sim_player = GAPlayerSimple(sim_chromo)
    adv_player = GAPlayerAdvanced(adv_chromo)
    full_player = GAPlayerFull(full_chromo)
    rand_player = LudoPlayerRandom()

    players = [sim_player, adv_player, full_player, rand_player]

    f = open('genetic_algorithm/agent_evaluations/prediction_times.txt', 'w')

    tournament_player_ids = {}
    for i, player in enumerate(players):
        tournament_player_ids[player] = i
        print(f'{player.name} with id {i}')
        f.write(f'{player.name} with id {i}\n')

    pred_times_sim = []
    pred_times_adv = []
    pred_times_full = []
Ejemplo n.º 4
0
#             for l in learning_rate:
#                 p = multiprocessing.Process(target=play_with_on_QLearning_thread, args=(10000, e, d, l))
#                 multiprocess.append(p)
#                 p.start()

#     for index, process in enumerate(multiprocess):

#         process.join()

#     print('That took {} seconds'.format(time.time() - starttime))

####################################################################################################################################################
###                                                              SINGLE GAME TEST                                                                ###
####################################################################################################################################################

players = [LudoPlayerRandom() for _ in range(4)]
# players.append(LudoPlayerQLearning("epsilon greedy", QtableName='QTable', RewardName='Reward', epsilon=0.1, discount_factor=0.5, learning_rate=0.1))
for i, player in enumerate(players):
    player.id = i  # selv tildele atributter uden defineret i klassen

score = [0, 0, 0, 0]

n = 100

start_time = time.time()
for i in range(n):
    random.shuffle(players)
    ludoGame = LudoGame(players)

    # for player in players: # Saving reward for QLearning player
    #     if player.id == 3: