def normal_training():

    players = [LudoPlayerRandom() for _ in range(3)]

    # GA optimized param
    # e = 0.40463712
    # d = 0.14343606
    # a = 0.10783296

    # Exhaustive search param
    e = 0.05
    d = 0.5
    a = 0.25

    # players.append(LudoPlayerQLearningToken("epsilon greedy", QtableName='Qlearning_token_logs/QTable_token_40000', RewardName='Qlearning_token_logs/Reward_token_40000', epsilon=e, discount_factor=d, learning_rate=a))
    # for i, player in enumerate(players):
    #     player.id = i # selv tildele atributter uden defineret i klassen

    # score = [0, 0, 0, 0]

    # n = 40000
    # start_time = time.time()
    # tqdm_1 = tqdm(range(n), ascii=True)
    # for i in tqdm_1:
    #     tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
    #     random.shuffle(players)
    #     ludoGame = LudoGame(players)

    #     winner = ludoGame.play_full_game()
    #     score[players[winner].id] += 1

    #     for player in players: # Saving reward for QLearning player
    #         if type(player)==LudoPlayerQLearningToken:
    #             player.append_reward()

    # for player in players:
    #     if type(player)==LudoPlayerQLearningToken:
    #         player.saveQTable() # only one player that is Qlearning
    #         player.saveReward()

    # duration = time.time() - start_time

    # print('win distribution:', score)

    # print('win distribution percentage', (score/np.sum(score))*100)
    # print('games per second:', n / duration)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV=
        f'Qlearning_token_logs/Reward_token_40000_e-{e}_d-{d}_a-{a}.csv',
        numMovAvg=1000)
예제 #2
0
def normal_play():
    players = []
    players = [LudoPlayerRandom() for _ in range(3)]

    epsilon = 0.05  #0.40463712 #0.05 #
    discount_factor = 0.5  #0.14343606 #0.5 #
    learning_rate = 0.25  #0.10783296  #0.25 #
    parameters = [epsilon, discount_factor, learning_rate]

    t1 = LudoPlayerQLearningAction(
        parameters,
        chosenPolicy="epsilon greedy",
        QtableName='Qlearning_action_logs/1QTable_action_r_win',
        RewardName='Qlearning_action_logs/1Reward_action_r_win')
    players.append(t1)
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    # n = 40000
    # start_time = time.time()
    # tqdm_1 = tqdm(range(n), ascii=True)
    # for i in tqdm_1:
    #     tqdm_1.set_description_str(f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
    #     random.shuffle(players)
    #     ludoGame = LudoGame(players)

    #     winner = ludoGame.play_full_game()
    #     score[players[winner].id] += 1

    #     for player in players: # Saving reward for QLearning player
    #         if type(player)==LudoPlayerQLearningAction:
    #             player.append_reward()

    # for player in players:
    #     if type(player)==LudoPlayerQLearningAction:
    #         player.saveQTable()
    #         player.saveReward()

    # duration = time.time() - start_time

    # print('win distribution percentage', (score/np.sum(score))*100)
    # print('win distribution:', score)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV=
        f'Qlearning_action_logs/1Reward_action_r_win_e-{epsilon}_d-{discount_factor}_a-{learning_rate}.csv',
        numMovAvg=1000)
예제 #3
0
def main():
    players = []
    players = [LudoPlayerRandom() for _ in range(3)]
    epsilon = 0.1
    discount_factor = 0.5
    learning_rate = 0.1
    parameters = [epsilon, discount_factor, learning_rate]
    t1 = LudoPlayerQLearningSimple(parameters,
                                   chosenPolicy="greedy",
                                   QtableName='1_QTable_simple',
                                   RewardName='1_Reward_simple')
    players.append(t1)
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = 5000
    start_time = time.time()
    tqdm_1 = tqdm(range(n), ascii=True)
    for i in tqdm_1:
        tqdm_1.set_description_str(
            f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        for player in players:  # Saving reward for QLearning player
            if type(player) == LudoPlayerQLearningSimple:
                player.append_reward()
                player.reset_upd_val()
                # player.rewards.append(player.total_reward)
                # player.total_reward = 0

    for player in players:
        if type(player) == LudoPlayerQLearningSimple:
            player.saveQTable()
            player.saveReward()

    duration = time.time() - start_time

    print('win distribution:', score)

    Plot = PlotStatistics()
    Plot.plotReward(pathToCSV='1_Reward_simple_e-0.1_d-0.5_a-0.1.csv',
                    numMovAvg=1000)
예제 #4
0
def normal_training():

    players = [LudoPlayerRandom() for _ in range(3)]
    players.append(
        LudoPlayerQLearningFullAction("epsilon greedy",
                                      QtableName='1_QTable_action_full_r_cum',
                                      RewardName='1_Reward_action_full_r_cum',
                                      epsilon=0.1,
                                      discount_factor=0.5,
                                      learning_rate=0.1))
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = 15000
    start_time = time.time()
    tqdm_1 = tqdm(range(n), ascii=True)
    for i in tqdm_1:
        tqdm_1.set_description_str(
            f"win rates {np.around(score/np.sum(score),decimals=2)*100}")
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        for player in players:  # Saving reward for QLearning player
            if type(player) == LudoPlayerQLearningFullAction:
                player.append_reward()

    for player in players:
        if type(player) == LudoPlayerQLearningFullAction:
            player.saveQTable()  # only one player that is Qlearning
            player.saveReward()

    duration = time.time() - start_time

    print('win distribution:', score)

    print('win distribution percentage', (score / np.sum(score)) * 100)
    print('games per second:', n / duration)

    Plot = PlotStatistics()
    Plot.plotReward(
        pathToCSV='1_Reward_action_full_r_cum_e-0.1_d-0.5_a-0.1.csv',
        numMovAvg=1000)
예제 #5
0
    def __play_against_random(self, best_pop_player, game_iterations, p_id, win_rate):
        players = [LudoPlayerRandom() for _ in range(3)]
        players.append(best_pop_player)

        for i, player in enumerate(players):
            player.id = i # selv tildele atributter uden defineret i klassen


        score = [0, 0, 0, 0]

        for i in range(game_iterations):
            random.shuffle(players)
            ludoGame = LudoGame(players)

            winner = ludoGame.play_full_game()
            score[players[winner].id] += 1

        win_rate[p_id] = score[3]

        print(f'Chromosome {best_pop_player.chromosome} scored win percentage against random {np.around(score/np.sum(score),decimals=2)*100}')
def play_with_on_QLearning_thread(num_games, epsilon, discount_factor,
                                  learning_rate):
    players = [LudoPlayerRandom() for _ in range(3)]
    players.append(
        LudoPlayerQLearningToken("epsilon greedy",
                                 QtableName='Param_optimization/QTable',
                                 RewardName='Param_optimization/Reward',
                                 epsilon=epsilon,
                                 discount_factor=discount_factor,
                                 learning_rate=learning_rate))
    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = num_games

    for i in range(n):
        random.shuffle(players)
        ludoGame = LudoGame(players)

        for player in players:  # Saving reward for QLearning player
            if type(player) == LudoPlayerQLearningToken:
                player.append_reward()

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1
        if i % 2500 == 0:
            print('Game ', i, ' done')

    for player in players:
        if type(player) == LudoPlayerQLearningToken:
            player.saveQTable()
            player.saveReward()

    print(
        f'Player with eps={epsilon}, discountfactor={discount_factor} and learningrate={learning_rate} won {np.around(score/np.sum(score),decimals=2)*100}'
    )
예제 #7
0
def eval(best_player):
    # Evaluting the performance of the best player
    players = [LudoPlayerRandom() for _ in range(2)]
    players.append(best_player)
    players.append(best_player)

    n_games = 10000

    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    tqdm_1 = tqdm(range(n_games), ascii=True)
    for i in tqdm_1:
        random.shuffle(players)
        ludoGame = LudoGame(players)

        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1

        tqdm_1.set_description_str(
            f"Validating best player: win rates {np.around(score/np.sum(score),decimals=2)*100}"
        )
예제 #8
0
def play_on_thread(num_games, player_type, opponent_type):

    # Player initilization
    if player_type is LudoPlayerRandom:
        players = [player_type() for _ in range(2)]
    elif player_type is LudoPlayerQLearningAction:
        # Exhuastive search parameters
        epsilon = 0.05
        discount_factor = 0.5
        learning_rate = 0.25
        parameters = [epsilon, discount_factor, learning_rate]
        players = [
            LudoPlayerQLearningAction(
                parameters,
                chosenPolicy="greedy",
                QtableName='Qlearning_action_logs/QTable_action_r_win',
                RewardName='Qlearning_action_logs/Reward_action_r_win')
            for _ in range(2)
        ]
    elif player_type is LudoPlayerQLearningToken:
        # Exhuastive search parameters
        epsilon = 0.05
        discount_factor = 0.5
        learning_rate = 0.25
        players = [
            LudoPlayerQLearningToken(
                "greedy",
                QtableName='Qlearning_token_logs/QTable_token_40000',
                RewardName='Qlearning_token_logs/Reward_token_40000',
                epsilon=epsilon,
                discount_factor=discount_factor,
                learning_rate=learning_rate) for _ in range(2)
        ]
    elif player_type is simple_GA_player:
        chromosome = [0.800085, 2.05562201, 0.55735083, -0.9978861]
        players = [simple_GA_player(chromosome) for _ in range(2)]
    elif player_type is MathiasPlayer:
        players = [MathiasPlayer() for _ in range(2)]
    elif player_type is smart_player:
        players = [smart_player() for _ in range(2)]
    else:
        print("player not found please check correctly added")
        return

    # Opponent initilization
    if opponent_type is LudoPlayerRandom:
        players += [LudoPlayerRandom() for _ in range(2)]
    elif opponent_type is LudoPlayerQLearningAction:
        # Exhuastive search parameters
        epsilon = 0.05
        discount_factor = 0.5
        learning_rate = 0.25
        parameters = [epsilon, discount_factor, learning_rate]
        players += [
            LudoPlayerQLearningAction(
                parameters,
                chosenPolicy="greedy",
                QtableName='Qlearning_action_logs/QTable_action_r_win',
                RewardName='Qlearning_action_logs/Reward_action_r_win')
            for _ in range(2)
        ]
    elif opponent_type is LudoPlayerQLearningToken:
        # Exhuastive search parameters
        epsilon = 0.05
        discount_factor = 0.5
        learning_rate = 0.25
        players += [
            LudoPlayerQLearningToken(
                "greedy",
                QtableName='Qlearning_token_logs/QTable_token_40000',
                RewardName='Qlearning_token_logs/Reward_token_40000',
                epsilon=epsilon,
                discount_factor=discount_factor,
                learning_rate=learning_rate) for _ in range(2)
        ]
    elif opponent_type is simple_GA_player:
        chromosome = [0.800085, 2.05562201, 0.55735083, -0.9978861]
        players += [simple_GA_player(chromosome) for _ in range(2)]
    elif opponent_type is MathiasPlayer:
        players += [MathiasPlayer() for _ in range(2)]
    elif player_type is smart_player:
        players = [smart_player() for _ in range(2)]
    else:
        print("player not found please check correctly added")
        return

    for i, player in enumerate(players):
        player.id = i  # selv tildele atributter uden defineret i klassen

    score = [0, 0, 0, 0]

    n = num_games

    for i in range(n):
        random.shuffle(players)
        ludoGame = LudoGame(players)
        winner = ludoGame.play_full_game()
        score[players[winner].id] += 1
        # if i%2500 == 0:
        #     print('Game ', i, ' done')

    win_rate_percent_player = np.sum((score / np.sum(score))[0:2]) * 100
    win_rate_percent_opponent = np.sum((score / np.sum(score))[2:]) * 100

    significant_player = binominal_test(num_games, np.sum(score[0:2]))
    significant_opponent = binominal_test(num_games, np.sum(score[2:]))

    print(
        f'Player: {player_type.name}, Won: {win_rate_percent_player:.2f}, Significant: {significant_player} VERSUS. Opponent: {opponent_type.name}, Won: {win_rate_percent_opponent:.2f}, Significant: {significant_opponent}'
    )