def sarsa_lambda_gradient_descent(): import matplotlib.pyplot as plt plt.ion() import q_learning as q import numpy as np from copy import deepcopy reload(q) game = q.MountainCarGame tile_in_row = 9 n_tilings = 5 #dot = sum(initial_theta[phi((2,2))]) state_adapter = q.mountain_car_game_tilings_state_adapter(n_tilings, tile_in_row) state_adapter2 = lambda s: np.array(state_adapter(s)) initial_theta = np.array([1] * tile_in_row * tile_in_row * n_tilings) q_algo1 = q.SARSALambdaGradientDescent(game().get_actions(), game().get_state(), initial_q = 0, initial_theta = initial_theta, state_adapter = state_adapter2) q_algo1.epsilon = 0.02 q_algo1.lmbda = 0.5 q_algo1.gamma = 0.9 q_algo1.alpha = 0.1 teacher = q.Teacher(game, q_algo1, q.MountainCarGameVisualizer(q_algo1)) teacher.teach(1) teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer()) teacher.teach(30)
def sarsa_lambda_on_mountain_car_game(): import q_learning as q import numpy as np from copy import deepcopy reload(q) game = q.MountainCarGame state_adapter = q.mountain_car_game_tilings_state_adapter(tile_in_row=9, n_tilings=5) q_algo1 = q.SARSALambda(game().get_actions(), game().get_state(), 0, memory_size=40, state_adapter=state_adapter) q_algo1.epsilon = 0.2 q_algo1.lmbda = 0.9 q_algo1.gamma = 0.5 visualizer = q.MountainCarGameVisualizer(q_algo1) teacher = q.Teacher(game, q_algo1, visualizer) teacher.teach(1) teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer()) teacher.teach(30)
def sarsa_lambda_on_mountain_car_game(): import q_learning as q import numpy as np from copy import deepcopy reload(q) game = q.MountainCarGame state_adapter = q.mountain_car_game_tilings_state_adapter(tile_in_row = 9, n_tilings = 5) q_algo1 = q.SARSALambda(game().get_actions(), game().get_state(), 0, memory_size = 40, state_adapter = state_adapter) q_algo1.epsilon = 0.2 q_algo1.lmbda = 0.9 q_algo1.gamma = 0.5 visualizer = q.MountainCarGameVisualizer(q_algo1) teacher = q.Teacher(game, q_algo1, visualizer) teacher.teach(1) teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer()) teacher.teach(30)
def sarsa_lambda_gradient_descent(): import matplotlib.pyplot as plt plt.ion() import q_learning as q import numpy as np from copy import deepcopy reload(q) game = q.MountainCarGame tile_in_row = 9 n_tilings = 5 #dot = sum(initial_theta[phi((2,2))]) state_adapter = q.mountain_car_game_tilings_state_adapter( n_tilings, tile_in_row) state_adapter2 = lambda s: np.array(state_adapter(s)) initial_theta = np.array([1] * tile_in_row * tile_in_row * n_tilings) q_algo1 = q.SARSALambdaGradientDescent(game().get_actions(), game().get_state(), initial_q=0, initial_theta=initial_theta, state_adapter=state_adapter2) q_algo1.epsilon = 0.02 q_algo1.lmbda = 0.5 q_algo1.gamma = 0.9 q_algo1.alpha = 0.1 teacher = q.Teacher(game, q_algo1, q.MountainCarGameVisualizer(q_algo1)) teacher.teach(1) teacher = q.Teacher(game, q_algo1, q.GameNoVisualizer()) teacher.teach(30)