def main(agent_info, agent_class, steps, filename): env_class = floating_horsetrack_environment.Environment rl_glue = RLGlue(env_class, agent_class) max_steps = steps step = 0 episode_end = [] cum_reward = 0 agent_info.update({"actions": env_class.actions}) rl_glue.rl_init(agent_info) while step < max_steps: rl_glue.rl_start() is_terminal = False while not is_terminal and step < max_steps: reward, state, action, is_terminal = rl_glue.rl_step() cum_reward += reward step += 1 if is_terminal: episode_end.append(step) rl_glue.rl_cleanup() save_results(episode_end, len(episode_end), "data/{}".format(filename))
def main(agent_info, agent_class, env_info, env_class, steps, param_info): # env_class = horsetrack_environment.Environment rl_glue = RLGlue(env_class, agent_class) max_steps = steps max_episodes = 5 step = 0 episodes = 0 episode_end = np.ones(max_episodes) * max_steps cum_reward = 0 # max_steps = 20000 agent_info.update({"actions": env_class.actions}) rl_glue.rl_init(agent_info, env_info) while step < max_steps and episodes < max_episodes: rl_glue.rl_start() is_terminal = False while not is_terminal and step < max_steps: reward, state, action, is_terminal = rl_glue.rl_step() cum_reward += reward step += 1 if is_terminal: episode_end[episodes] = step episodes += 1 rl_glue.rl_cleanup() save_results(episode_end, "{}".format(param_info))
def main(): env_class = horsetrack_environment.Environment agent_class = random_agent.Agent rl_glue = RLGlue(env_class, agent_class) num_episodes = 1000 max_steps = 100000 print("\tPrinting one dot for every run: {}".format(num_episodes), end=' ') print("total runs to complete.") total_steps = [0 for _ in range(max_steps)] for i in range(num_episodes): rl_glue.rl_init(agent_info={"actions": env_class.actions}) rl_glue.rl_start() is_terminal = False while rl_glue.num_steps < max_steps and not is_terminal: reward, state, action, is_terminal = rl_glue.rl_step() # optimal_action[num_steps] += 1 if "action is optimal" else 0 total_steps[i] = rl_glue.num_steps rl_glue.rl_cleanup() print(".", end='') sys.stdout.flush() # prop_optimal = [num_optimal / num_episodes for num_optimal in optimal_action] save_results(total_steps, len(total_steps), "RL_EXP_OUT.dat") print("\nDone")
def main(data_output_location="new_data"): env_class = horsetrack_environment.Environment agent_class = random_agent.Agent agent_name = agent_class.__module__[agent_class.__module__.find(".") + 1:] environment_name = env_class.__module__[env_class.__module__.find(".") + 1:] rl_glue = RLGlue(env_class, agent_class) # num_episodes = 2000 # max_steps = 1000 max_total_steps = 100_000 for epsilon in [0.0, 0.1]: for alpha in [2, 1, 0.5, 0.25, 0.125, 0.0625]: print("Running Agent: {} on Environment: {}.".format( agent_name, environment_name)) agent_init_info = { "actions": [-1, 1], "world_size": 100, "epsilon": epsilon, "alpha": alpha } termination_times = [] rl_glue.rl_init(agent_init_info=agent_init_info) step_counter = 0 while step_counter < max_total_steps: rl_glue.rl_start() is_terminal = False while step_counter < max_total_steps and not is_terminal: reward, state, action, is_terminal = rl_glue.rl_step() step_counter += 1 rl_glue.rl_cleanup() # print(".", end='') sys.stdout.flush() if is_terminal: termination_times.append(step_counter) epoch_datetime = int( (datetime.datetime.now() - datetime.datetime.utcfromtimestamp(0)).total_seconds()) save_results( termination_times, len(termination_times), "{}/{}_{}__{}__epsilon{}__alpha{}.dat".format( data_output_location, epoch_datetime, agent_name, environment_name, epsilon, alpha)) print("\nDone")
def run_experiment(env_info, agent_info, num_episodes=5000, value_error_threshold=1e-8, plot_freq=10): env = GridEnvironment agent = TDAgent rl_glue = RLGlue(env, agent) rl_glue.rl_init(agent_info, env_info) steps = [] for episode in range(1, num_episodes + 1): rl_glue.rl_episode(0) # no step limit steps.append(rl_glue.agent.agent_message("get_steps")) if episode % plot_freq == 0: values = rl_glue.agent.agent_message("get_values") print(rl_glue.environment.env_message("get_grid_state")) rl_glue.rl_cleanup() values = rl_glue.agent.agent_message("get_values") return [values, steps]