Python q_learning Beispiele, src.q_learning Python Beispiele

Beispiel #1

0

Datei anzeigen

    def compare_max_first_random(list_maps):

        for name_map, number_episodes in list_maps:
            game_map = Map(name_map)
            _, _, _, _, q_values_max, number_visited_max = q_learning(
                game_map=game_map,
                epsilon_greedy=max_first,
                training_episode=number_episodes)
            _, _, _, _, q_values_random, number_visited_random = q_learning(
                game_map=game_map,
                epsilon_greedy=random_choice_action,
                training_episode=number_episodes)

            plt.clf()
            fig = plt.figure()
            plt.plot(range(1,
                           len(q_values_max) + 1),
                     q_values_max,
                     color='yellow',
                     label='Max Fist')
            plt.plot(range(1,
                           len(q_values_random) + 1),
                     q_values_random,
                     color='blue',
                     label="Random")
            plt.title("Evolution q values, Map : " +
                      name_map.split('/')[-1].split('.')[0])
            plt.xlabel("Number episodes")
            plt.ylabel("Mean Q Values")
            plt.legend()
            plt.savefig('plot/Evolution_q_values_Map_' +
                        name_map.split('/')[-1].split('.')[0] + '.png')

            plt.clf()
            fig = plt.figure()
            plt.plot(range(1,
                           len(number_visited_max) + 1),
                     number_visited_max,
                     color='yellow',
                     label='Max Fist')
            plt.plot(range(1,
                           len(number_visited_random) + 1),
                     number_visited_random,
                     color='blue',
                     label='Random')
            plt.title("Len Q, Map : " + name_map.split('/')[-1].split('.')[0])
            plt.xlabel("Number episodes")
            plt.ylabel("Len Q")
            plt.legend()
            plt.savefig('plot/Len_Q_Map_' +
                        name_map.split('/')[-1].split('.')[0] + '.png')

Beispiel #2

0

Datei anzeigen

    def compare_q_learning_sarsa(list_maps):
        param = [("max first", max_first, 'default'),
                 ("Explore : softmax", explore, 'softmax'),
                 ('Explore-exploitation : softmax', explore_exploitation,
                  'softmax')]

        for name_map, number_episodes in list_maps:
            game_map = Map(name_map)
            for p in param:
                _, _, _, _, q_values_q, number_q = q_learning(
                    game_map=game_map,
                    training_episode=number_episodes,
                    epsilon_greedy=p[1],
                    strategy=p[2])

                _, _, _, _, q_values_sarsa, number_sarsa = sarsa(
                    game_map=game_map,
                    training_episode=number_episodes,
                    epsilon_greedy=p[1],
                    strategy=p[2])

                plt.clf()
                fig = plt.figure()
                plt.plot(range(1,
                               len(q_values_q) + 1),
                         q_values_q,
                         color='green',
                         label='Q-Learning')
                plt.plot(range(1,
                               len(q_values_sarsa) + 1),
                         q_values_sarsa,
                         color='red',
                         label="SARSA")
                plt.title("Evolution q values : " + p[0] + " Map : " +
                          name_map.split('/')[-1].split('.')[0])
                plt.xlabel("Number episodes")
                plt.ylabel("Mean Q Values")
                plt.legend()
                plt.savefig('plot/Q_Learning_vs_SARSA_Value_' + p[0] + '_' +
                            name_map.split('/')[-1].split('.')[0] + '.png')

                plt.clf()
                fig = plt.figure()
                plt.plot(range(1,
                               len(number_q) + 1),
                         number_q,
                         color='yellow',
                         label='Q-Learning')
                plt.plot(range(1,
                               len(number_sarsa) + 1),
                         number_sarsa,
                         color='blue',
                         label='SARSA')
                plt.title("Len Q " + p[0] + " Map : " +
                          name_map.split('/')[-1].split('.')[0])
                plt.xlabel("Number episodes")
                plt.ylabel("Len Q")
                plt.legend()
                plt.savefig('plot/Q_Learning_vs_SARSA_Len_' + p[0] + '_' +
                            name_map.split('/')[-1].split('.')[0] + '.png')

Beispiel #3

0

Datei anzeigen

    def eval_score_max_first(list_maps):

        for name_map, number_episodes in list_maps:
            game_map = Map(name_map)
            _, _, _, _, q_values, _ = q_learning(
                game_map=game_map,
                epsilon_greedy=max_first,
                training_episode=number_episodes)
            plt.clf()
            fig = plt.figure()
            plt.plot(range(1, len(q_values) + 1), q_values)
            plt.title("Max Fist : evolution q values Map : " +
                      name_map.split('/')[-1].split('.')[0])
            plt.xlabel("Number episodes")
            plt.ylabel("Mean Q Values")
            plt.savefig('plot/Max_Fist_evolution_q_values_Map_' +
                        name_map.split('/')[-1].split('.')[0] + '.png')

Beispiel #4

0

Datei anzeigen

    def test_hyperparameter(args):
        name_map, number_episodes, name_epsilon_greedy, epsilon_greedy, strategy = args
        learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
        discovering_factors = [0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.5]
        game_map = Map(name_map)
        hearder_table = [
            "D.F. / L.R.", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7"
        ]

        win_ant = []
        win_eval = []

        for discovering_factor in discovering_factors:
            w_ant = [discovering_factor]
            w_eval = [discovering_factor]

            for learning_rate in learning_rates:
                Q = {}
                Q, _, _, win_games, _, _ = q_learning(
                    Q=Q,
                    game_map=game_map,
                    training_episode=number_episodes,
                    epsilon_greedy=epsilon_greedy,
                    strategy=strategy,
                    learning_rate=learning_rate,
                    discovering_factor=discovering_factor)
                w_ant.append(round(win_games, 4))
                w_eval.append(
                    round(Eval.run_games(Q, game_map, number_episodes), 2))

            win_ant.append(w_ant)
            win_eval.append(w_eval)

        df1 = pd.DataFrame(win_ant, columns=hearder_table)
        df1.to_excel("xlsx/Hyperparameters_{}_{}_Training.xlsx".format(
            name_epsilon_greedy,
            name_map.split('/')[-1].split('.')[0]))

        df2 = pd.DataFrame(win_eval, columns=hearder_table)
        df2.to_excel("xlsx/Hyperparameters_{}_{}_Evaluation.xlsx".format(
            name_epsilon_greedy,
            name_map.split('/')[-1].split('.')[0]))

Beispiel #5

0

Datei anzeigen

    def plot_score(list_maps):
        param = [("random", random_choice_action, "default"),
                 ("max first", max_first, 'default'),
                 ("Explore : default", explore, 'default'),
                 ("Explore : softmax", explore, 'softmax'),
                 ("Explore : uct", explore, 'uct'),
                 ('Explore-exploitation : default', explore_exploitation,
                  'default'),
                 ('Explore-exploitation : softmax', explore_exploitation,
                  'softmax')]

        for name_map, number_episodes in list_maps:
            game_map = Map(name_map)
            for p in param:
                _, train_scores, eval_scores, _, _, _ = q_learning(
                    game_map=game_map,
                    training_episode=number_episodes,
                    epsilon_greedy=p[1],
                    strategy=p[2])
                plt.clf()
                fig = plt.figure()
                plt.plot(np.linspace(1, len(train_scores), len(train_scores)),
                         np.convolve(train_scores, [0.2, 0.2, 0.2, 0.2, 0.2],
                                     "same"),
                         linewidth=1.0,
                         color="blue",
                         label='Training')
                plt.plot(np.linspace(10, len(train_scores), len(eval_scores)),
                         eval_scores,
                         linewidth=2.0,
                         color="red",
                         label='Evaluation')
                plt.title("Score evolution: " + p[0] + " Map " + name_map)
                plt.ylabel("Average score")
                plt.xlabel("Episode")
                plt.legend()
                plt.savefig("plot/" + "Score_evolution" + p[0] + "_Map_" +
                            name_map.split('/')[-1].split('.')[0] + ".png")

Beispiel #6

0

Datei anzeigen

from src.eval import *
from src.map import *
from src.display import *
from src.q_learning import *
from src.play import Game

if __name__ == '__main__':
    game_map = Map("config/mini_map.txt")

    Q, _, _, win, _, _ = q_learning(game_map, epsilon_greedy=max_first, strategy='default', epsilon=0.9,
                                    training_episode=10000, verbose=False)
    print(win)
    Game.run_simulation(game_map, Q, epsilon_greedy=explore_exploitation)