コード例 #1
0
def sarsa_lambda_on_mountain_car_game():
    import q_learning as q
    import numpy as np
    from copy import deepcopy
    reload(q)
    game = q.MountainCarGame

    state_adapter = q.mountain_car_game_tilings_state_adapter(tile_in_row=9,
                                                              n_tilings=5)

    q_algo1 = q.SARSALambda(game().get_actions(),
                            game().get_state(),
                            0,
                            memory_size=40,
                            state_adapter=state_adapter)
    q_algo1.epsilon = 0.2
    q_algo1.lmbda = 0.9

    q_algo1.gamma = 0.5

    visualizer = q.MountainCarGameVisualizer(q_algo1)
    teacher = q.Teacher(game, q_algo1, visualizer)

    teacher.teach(1)

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
    teacher.teach(30)
コード例 #2
0
def on_policy_is_more_about_safety():
    game = game_collect_all
    q_algo1 = q.SARSA(game.get_actions(), game.get_state(), 20)

    q_algo1.gamma = 0.5

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())

    q_algo1.alpha = 0.1
    q_algo1.epsilon = 0.1
    teacher.teach(1500)

    teacher = q.Teacher(game, q_algo1, q.CollectAllGameVisualizer())

    q_algo1.epsilon = 0
    teacher.teach(1)
コード例 #3
0
def random_on_mountain_car_game():
    game = q.MountainCarGame()
    q_algo = q.RandomAlgo(game.get_actions())
    visualizer = q.MountainCarGameVisualizer()

    teacher = q.Teacher(game, q_algo, visualizer)

    teacher.teach(1)
コード例 #4
0
def off_policy_example():
    game = game_collect_all
    q_algo1 = q.SARSRepeat(game().get_actions(), game().get_state())

    q_algo1.gamma = 0.5

    teacher = q.Teacher(game(), q_algo1, q.GameNoVisualizer())

    q_algo1.alpha = 0.1
    q_algo1.epsilon = 0.1
    teacher.teach(1500)

    #q_algo1.alpha = 0.1
    #q_algo1.epsilon = 0.1
    #teacher.teach(5000, verbose = lambda x: False)

    teacher = q.Teacher(game(), q_algo1, q.CollectAllGameVisualizer())
    #q_algo1.alpha = 0
    q_algo1.epsilon = 0
    teacher.teach(1)
コード例 #5
0
def random_on_space_invaders():
    import q_learning as q
    import numpy as np
    import ale_game as ag
    reload(q)
    reload(ag)
    ale = ag.init()
    game = ag.SpaceInvadersGame(ale)
    #game.show_vectorized(game.vectorized(ale.getScreen()))
    teacher = q.Teacher(game, q.RandomAlgo(game.get_actions()),
                        ag.SpaceInvadersGameVectorizedVisualizer())
    teacher.teach(1)
コード例 #6
0
def sarsa_lambda_example2():
    game = game_big2
    q_algo1 = q.SARSALambda(game().get_actions(), game().get_state(), 20, 4)
    q_algo1.lmbda = 0.9999

    q_algo1.gamma = 0.5

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())

    q_algo1.alpha = 0.1
    q_algo1.epsilon = 0.1
    teacher.teach(1500)

    #q_algo1.alpha = 0.1
    #q_algo1.epsilon = 0.1
    #teacher.teach(5000, verbose = lambda x: False)

    teacher = q.Teacher(game, q_algo1, q.CollectAllGameVisualizer())
    #q_algo1.alpha = 0
    q_algo1.epsilon = 0
    teacher.teach(1)
コード例 #7
0
def sarsa_lambda_example():
    game = game_collect_all
    q_algo1 = q.SARSALambda(game().get_actions(), game().get_state(), 20, 4)
    q_algo1.lmbda = 0.8

    q_algo1.gamma = 0.5

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())

    q_algo1.alpha = 0.1
    q_algo1.epsilon = 0.1
    result_1 = teacher.teach(60)

    #q_algo1.alpha = 0.1
    #q_algo1.epsilon = 0.1
    #teacher.teach(5000, verbose = lambda x: False)

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
    #q_algo1.alpha = 0
    q_algo1.epsilon = 0
    result_1 = teacher.teach(1)
コード例 #8
0
def sarsa_lambda_gradient_descent():
    import matplotlib.pyplot as plt
    plt.ion()
    import q_learning as q
    import numpy as np
    from copy import deepcopy
    reload(q)
    game = q.MountainCarGame

    tile_in_row = 9
    n_tilings = 5

    #dot = sum(initial_theta[phi((2,2))])

    state_adapter = q.mountain_car_game_tilings_state_adapter(
        n_tilings, tile_in_row)

    state_adapter2 = lambda s: np.array(state_adapter(s))

    initial_theta = np.array([1] * tile_in_row * tile_in_row * n_tilings)

    q_algo1 = q.SARSALambdaGradientDescent(game().get_actions(),
                                           game().get_state(),
                                           initial_q=0,
                                           initial_theta=initial_theta,
                                           state_adapter=state_adapter2)

    q_algo1.epsilon = 0.02
    q_algo1.lmbda = 0.5
    q_algo1.gamma = 0.9
    q_algo1.alpha = 0.1

    teacher = q.Teacher(game, q_algo1, q.MountainCarGameVisualizer(q_algo1))
    teacher.teach(1)

    teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer())
    teacher.teach(30)
コード例 #9
0
def sarsa_lambda_prioritized_memory_on_cliff():
    import q_learning as q
    import numpy as np
    from copy import deepcopy
    reload(q)
    game = game_collect_all
    memory = q.PrioritizedMemory(100, 0)
    algo = q.SARSALambdaPrioritizedMemory(game().get_actions(), memory)

    algo.epsilon = 0.1
    algo.gamma = 0.7
    algo.alpha = 0.1

    teacher = q.Teacher(game, algo, q.GameNoVisualizer())

    result_0 = teacher.teach(60)
コード例 #10
0
def sarsa_gd_on_space_invaders():
    import q_learning as q
    import numpy as np
    import ale_game as ag
    import matplotlib.pyplot as plt
    plt.ion()
    reload(q)
    reload(ag)
    ale = ag.init()
    run = '1'

    n_colors = 5

    def state_adapter(scr):
        vect = np.reshape(ag.vectorized(scr, 14, 20), 14 * 20 * n_colors)
        return np.where(vect)[0]

    game = ag.SpaceInvadersGame(ale)
    q_algo1 = q.SARSALambdaGradientDescent(game.get_actions(),
                                           game.get_state(),
                                           initial_q=5,
                                           initial_theta=[1] * 14 * 20 *
                                           n_colors,
                                           be_positive=False,
                                           state_adapter=state_adapter)
    q_algo1.epsilon = 0.05
    q_algo1.lmbda = 0.99  # 0.9
    q_algo1.gamma = 0.999
    q_algo1.alpha = 0.5

    def new_game():
        game.ale.reset_game()
        game.finished = False
        game.cum_reward = 0
        return game

    teacher = q.Teacher(new_game,
                        q_algo1,
                        ag.SpaceInvadersGameVectorizedVisualizer(),
                        repeat_action=3)

    #  teacher.single_step(Game)
    q_algo1.epsilon = 0
    q_algo1.log_freq = 1
    teacher.teach(1)

    initial_training = 1000
    training_decay_from = 95
    training_decay_ex = 50

    result_test = []
    result_1 = []
    result_2 = []

    teacher = q.Teacher(new_game,
                        q_algo1,
                        q.GameNoVisualizer(),
                        repeat_action=3)
    q_algo1.log_freq = 0.05
    q_algo1.epsilon = 1
    result_1 = teacher.teach(initial_training)

    q_algo1.epsilon = 0
    q_algo1.log_freq = 0.05
    result_test.append(teacher.teach(1))

    for i in range(training_decay_from):
        q_algo1.epsilon = 1 - i / 100
        teacher = q.Teacher(new_game,
                            q_algo1,
                            q.GameNoVisualizer(),
                            repeat_action=3)
        result_2.append(teacher.teach(training_decay_ex))
        q_algo1.epsilon = 0
        result_test.append(teacher.teach(1))

    import cPickle as pickle
    with open('gradient_descent.theta' + run, 'wb') as handle:
        pickle.dump(q_algo1.theta, handle)

    with open('gradient_descent.gamma' + run, 'wb') as handle:
        pickle.dump(q_algo1.gamma, handle)

    with open('gradient_descent.lmbda' + run, 'wb') as handle:
        pickle.dump(q_algo1.lmbda, handle)

    with open('gradient_descent.alpha' + run, 'wb') as handle:
        pickle.dump(q_algo1.alpha, handle)

    r1 = [a[1] for a in result_1]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r1),
                                      np.cumsum(r1)[200:])]) / 200)

    r2 = [a[1] for r in result_2 for a in r]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r2),
                                      np.cumsum(r2)[200:])]) / 200)

    r_test = [a[1] for r in result_test for a in r]
    plt.plot(
        np.array([
            x[1] - x[0] for x in zip(np.cumsum(r_test),
                                     np.cumsum(r_test)[50:])
        ]) / 50)

    r_4 = [a[1] for a in result_4]
    plt.plot(
        np.array(
            [x[1] - x[0] for x in zip(np.cumsum(r_test),
                                      np.cumsum(r_4)[2:])]) / 2)

    q_algo1.epsilon = 0.1
    teacher = q.Teacher(new_game,
                        q_algo1,
                        q.GameNoVisualizer(),
                        repeat_action=3)
    teacher.teach(100)
コード例 #11
0
 def test(self, game_factory, algo_factory, teach_rounds=100, repeat=100):
     return np.array([
         q.Teacher(game_factory, algo_factory(),
                   q.GameNoVisualizer()).teach(teach_rounds)
         for i in range(0, repeat)
     ]).mean(axis=0)