def main(argv): del argv gw = Gridworld(10, 10, 0, 99) #for i in range(7): # gw.grid[7][i] = 1 agent_module = importlib.import_module("agents." + FLAGS.agent) avg_num_steps = np.zeros(FLAGS.num_episodes) policy = getattr(policies.tabular_policies, FLAGS.policy) for _ in range(FLAGS.num_trials): agent = agent_module.Agent(FLAGS.agent, gw.width * gw.height, FLAGS.gamma, policy, FLAGS.alpha) steps_per_episode = [] for _ in range(FLAGS.num_episodes): state = gw.start action = agent.select_action(state) step = 0 terminate = False while step < FLAGS.max_steps and not terminate: next_state = gw.apply_action(state, action) terminate, reward = gw.is_goal(next_state) next_action = agent.select_action(next_state) agent.update(state, action, reward, next_state, next_action) state = next_state action = next_action step += 1 steps_per_episode.append(step) avg_num_steps += np.array(steps_per_episode) avg_num_steps = avg_num_steps / FLAGS.num_trials plt.plot(avg_num_steps) plt.show()
def main(argv): del argv gw = Gridworld(10, 10, 0, 80) for i in range(7): gw.grid[7][i] = 1 agent_module = importlib.import_module("agents." + FLAGS.agent) avg_num_steps = np.zeros(FLAGS.num_episodes) policy = getattr(policies.tabular_policies, FLAGS.policy) for _ in range(FLAGS.num_trials): agent = agent_module.Agent(FLAGS.agent, gw.width * gw.height, FLAGS.n, FLAGS.gamma, policy, FLAGS.alpha) steps_per_episode = [] for _ in range(FLAGS.num_episodes): T = np.Inf state = gw.start agent.reset_agent() agent.stored_states.append(state) action = agent.select_action(state) agent.stored_actions.append(action) step = 0 tau = 0 while tau != T - 1: if step < T: next_state = gw.apply_action(state, action) terminate, reward = gw.is_goal(next_state) agent.stored_states.append(next_state) agent.stored_rewards.append(reward) if terminate or step == FLAGS.max_steps - 1: T = step + 1 else: next_action = agent.select_action(next_state) agent.stored_actions.append(next_action) state = next_state action = next_action tau = step - agent.n + 1 if tau >= 0: agent.update(tau, T) step += 1 steps_per_episode.append(step) avg_num_steps += np.array(steps_per_episode) avg_num_steps = avg_num_steps / FLAGS.num_trials np.save(FLAGS.log_path + "/" + FLAGS.log_file, avg_num_steps) plt.plot(avg_num_steps) plt.savefig(FLAGS.log_path + "/" + FLAGS.log_file)