def param_optimization():
    starttime = time.time()
    epsilons = [0.1, 0.05]
    discount_factors = [0.9, 0.5]
    learning_rate = [0.5, 0.25, 0.1, 0.05]

    combination = []
    multiprocess = []

    for e in epsilons:
        for d in discount_factors:
            for l in learning_rate:
                p = multiprocessing.Process(
                    target=play_with_on_QLearning_thread,
                    args=(10000, e, d, l))
                multiprocess.append(p)
                p.start()

    for index, process in enumerate(multiprocess):

        process.join()

    print('That took {} seconds'.format(time.time() - starttime))
    Plot = PlotStatistics()
    Plot.plotMultiple(pathToFolder="Param_optimization", numMovAvg=1000)
Exemple #2
0
def plot():
    Plot = PlotStatistics()
    labels = [
        r'$\theta_{1}$', r'$\theta_{2}$', r'$\theta_{3}$', r'$\theta_{4}$'
    ]
    Plot.plot_chromosome_2D(
        path_to_csv='GA_simple_logs/simple_ga_chromosomes.csv', labels=labels)
def normal_training():

    players = [LudoPlayerRandom() for _ in range(3)]

    # GA optimized param
    # e = 0.40463712
    # d = 0.14343606
    # a = 0.10783296

    # Exhaustive search param
    e = 0.05
    d = 0.5
    a = 0.25

    # players.append(LudoPlayerQLearningToken("epsilon greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=e, discount_factor=d, learning_rate=a))
    # for i, player in enumerate(players):
    #     player.id = i # selv tildele atributter uden defineret i klassen

    # score = [0, 0, 0, 0]

    # n = 40000
    # start_time = time.time()
    # tqdm_1 = tqdm(range(n), ascii=True)
    # for i in tqdm_1:
    #     tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
    #     random.shuffle(players)
    #     ludoGame = LudoGame(players)

    #     winner = ludoGame.play_full_game()
    #     score[players[winner].id] += 1

    #     for player in players: # Saving reward for QLearning player
    #         if type(player)==LudoPlayerQLearningToken:
    #             player.append_reward()

    # for player in players:
    #     if type(player)==LudoPlayerQLearningToken:
    #         player.saveQTable() # only one player that is Qlearning
    #         player.saveReward()

    # duration = time.time() - start_time

    # print('win distribution:', score)

    # print('win distribution percentage', (score/np.sum(score))*100)
    # print('games per second:', n / duration)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV=
        f'Qlearning_token_logs/Reward_token_40000_e-{e}_d-{d}_a-{a}.csv',
        numMovAvg=1000)
Exemple #4
0
def normal_play():
    players = []
    players = [LudoPlayerRandom() for _ in range(3)]

    epsilon = 0.05  #0.40463712 #0.05 #
    discount_factor = 0.5  #0.14343606 #0.5 #
    learning_rate = 0.25  #0.10783296  #0.25 #
    parameters = [epsilon, discount_factor, learning_rate]

    t1 = LudoPlayerQLearningAction(
        parameters,
        chosenPolicy="epsilon greedy",
        QtableName='Qlearning_action_logs/1QTable_action_r_win',
        RewardName='Qlearning_action_logs/1Reward_action_r_win')
    players.append(t1)
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    # n = 40000
    # start_time = time.time()
    # tqdm_1 = tqdm(range(n), ascii=True)
    # for i in tqdm_1:
    #     tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
    #     random.shuffle(players)
    #     ludoGame = LudoGame(players)

    #     winner = ludoGame.play_full_game()
    #     score[players[winner].id] += 1

    #     for player in players: # Saving reward for QLearning player
    #         if type(player)==LudoPlayerQLearningAction:
    #             player.append_reward()

    # for player in players:
    #     if type(player)==LudoPlayerQLearningAction:
    #         player.saveQTable()
    #         player.saveReward()

    # duration = time.time() - start_time

    # print('win distribution percentage', (score/np.sum(score))*100)
    # print('win distribution:', score)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV=
        f'Qlearning_action_logs/1Reward_action_r_win_e-{epsilon}_d-{discount_factor}_a-{learning_rate}.csv',
        numMovAvg=1000)
Exemple #5
0
def main():
    players = []
    players = [LudoPlayerRandom() for _ in range(3)]
    epsilon = 0.1
    discount_factor = 0.5
    learning_rate = 0.1
    parameters = [epsilon, discount_factor, learning_rate]
    t1 = LudoPlayerQLearningSimple(parameters,
                                   chosenPolicy="greedy",
                                   QtableName='1_QTable_simple',
                                   RewardName='1_Reward_simple')
    players.append(t1)
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = 5000
    start_time = time.time()
    tqdm_1 = tqdm(range(n), ascii=True)
    for i in tqdm_1:
        tqdm_1.set_description_str(
            f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        for player in players:  # Saving reward for QLearning player
            if type(player) == LudoPlayerQLearningSimple:
                player.append_reward()
                player.reset_upd_val()
                # player.rewards.append(player.total_reward)
                # player.total_reward = 0

    for player in players:
        if type(player) == LudoPlayerQLearningSimple:
            player.saveQTable()
            player.saveReward()

    duration = time.time() - start_time

    print('win distribution:', score)

    Plot = PlotStatistics()
    Plot.plotReward(pathToCSV='1_Reward_simple_e-0.1_d-0.5_a-0.1.csv',
                    numMovAvg=1000)
Exemple #6
0
def normal_training():

    players = [LudoPlayerRandom() for _ in range(3)]
    players.append(
        LudoPlayerQLearningFullAction("epsilon greedy",
                                      QtableName='1_QTable_action_full_r_cum',
                                      RewardName='1_Reward_action_full_r_cum',
                                      epsilon=0.1,
                                      discount_factor=0.5,
                                      learning_rate=0.1))
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = 15000
    start_time = time.time()
    tqdm_1 = tqdm(range(n), ascii=True)
    for i in tqdm_1:
        tqdm_1.set_description_str(
            f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        for player in players:  # Saving reward for QLearning player
            if type(player) == LudoPlayerQLearningFullAction:
                player.append_reward()

    for player in players:
        if type(player) == LudoPlayerQLearningFullAction:
            player.saveQTable()  # only one player that is Qlearning
            player.saveReward()

    duration = time.time() - start_time

    print('win distribution:', score)

    print('win distribution percentage', (score / np.sum(score)) * 100)
    print('games per second:', n / duration)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV='1_Reward_action_full_r_cum_e-0.1_d-0.5_a-0.1.csv',
        numMovAvg=1000)
Exemple #7
0
def plot():
    Plot = PlotStatistics()
    labels = [r'$\epsilon$', r'$\gamma$', r'$\alpha$']
    #Plot.plot_chromosome_2D(path_to_csv='GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv', labels=labels)
    Plot.plot_chromosome_3D(
        path_to_csv=
        'GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv')
    Plot.plot_chromosome_2D_qlearning(
        path_to_csv=
        'GA_qlearning_param_opti_logs/qlean_param_opti_ga_chromosomes.csv',
        labels=labels)
Exemple #8
0
# for i, player in enumerate(players):
#     player.id = i # selv tildele atributter uden defineret i klassen

# score = [0, 0, 0, 0]

# tqdm_1 = tqdm(range(n_games), ascii=True)
# for i in tqdm_1:
#     random.shuffle(players)
#     ludoGame = LudoGame(players)

#     winner = ludoGame.play_full_game()
#     score[players[winner].id] += 1

#     tqdm_1.set_description_str(f"Validating best player: win rates {np.around(score/np.sum(score),decimals=2)*100}")

Plot = PlotStatistics()
Plot.plot_chromosome_2D(path_to_csv='chromosomes_plot.csv')

##### TESTING FOR QLEARNING PARAMETERS #####

# basetournement_test = basetournement(LudoPlayerQLearningSimple, 20, type_player='qlearning') # 32 chromomes max out the thread count of this computer.

# chromosomes = np.random.uniform(low=0, high=1, size=(20, 3)) # 32 chromomes max out the thread count of this computer.
# print(chromosomes)

# best_player = basetournement_test.play_for_generations(chromosomes, tournament_it=100, generations_it=20, validation_it=1) # problem gets the same 5 chromosome as the best
# print(f'The best player chosen has chromosome {best_player.chromosome}')

# Evaluting the performance of the best player
# players = [LudoPlayerRandom() for _ in range(3)]
# players.append(best_player)