def experiment(test_game, num_experiments, file_name, num_episodes=500, alpha=.99, gamma=.9, epsilon=.9, decay_rate=.99): """ Main experiment method that runs the Q-Learning experiments and returns prints and draws the needed diagrams. works by learning a model x number of times and then compiling the number of steps per epoch for experiment These are then averaged and used to create a graph. A policy is then also chosen to give an average number of steps needed to reach the goal metric. """ list_of_moves_per_experiment = [] policies = [] for x in range(num_experiments): # Learn model q_learning = QLearning(test_game, num_episodes=num_episodes, alpha=alpha, gamma=gamma, epsilon=epsilon, decay_rate=decay_rate) q = q_learning.learn() policies.append(q) num_moves = q_learning.num_moves_per_episode list_of_moves_per_experiment.append(num_moves) np.array(list_of_moves_per_experiment) moves_per_epoc_number = np.sum(list_of_moves_per_experiment, axis=0) moves_per_epoc_number = moves_per_epoc_number / num_experiments # get Average number of steps when executing. q_learning = QLearning(test_game, num_episodes=num_episodes, alpha=alpha, gamma=gamma, epsilon=epsilon, decay_rate=decay_rate) avg_num_steps = 0 for itter in range(100): num_steps = q_learning.execute_policy(policies[num_experiments - 1]) avg_num_steps += num_steps[1] avg_num_steps /= 100.0 generate_validation_curves(np.arange(num_episodes), moves_per_epoc_number, None, "Number of steps", None, x_axis_label="Epoc Number", y_axis_label="Average Path Length", file_name=file_name) return avg_num_steps, policies[num_experiments - 1]
from environment import Env from QLearning import QLearning if __name__ == "__main__": env = Env() QL = QLearning(list(range(env.n_actions))) for episode in range(1000): state = env.reset() while True: env.render() # take action and proceed one step in the environment action = QL.get_action(str(state)) next_state, reward, done = env.step(action) # with sample <s,a,r,s'>, agent learns new q function QL.learn(str(state), action, reward, str(next_state)) state = next_state env.print_value_all(QL.q_table) # if episode ends, then break if done: break