def main(agent_info, agent_class, steps, filename):
    env_class = floating_horsetrack_environment.Environment
    rl_glue = RLGlue(env_class, agent_class)

    max_steps = steps
    step = 0
    episode_end = []
    cum_reward = 0

    agent_info.update({"actions": env_class.actions})
    rl_glue.rl_init(agent_info)

    while step < max_steps:
        rl_glue.rl_start()

        is_terminal = False

        while not is_terminal and step < max_steps:
            reward, state, action, is_terminal = rl_glue.rl_step()
            cum_reward += reward

            step += 1

        if is_terminal:
            episode_end.append(step)
        rl_glue.rl_cleanup()

    save_results(episode_end, len(episode_end), "data/{}".format(filename))
Ejemplo n.º 2
0
def main(agent_info, agent_class, env_info, env_class, steps, param_info):
    # env_class = horsetrack_environment.Environment
    rl_glue = RLGlue(env_class, agent_class)

    max_steps = steps
    max_episodes = 5
    step = 0
    episodes = 0
    episode_end = np.ones(max_episodes) * max_steps
    cum_reward = 0

    # max_steps = 20000

    agent_info.update({"actions": env_class.actions})
    rl_glue.rl_init(agent_info, env_info)

    while step < max_steps and episodes < max_episodes:
        rl_glue.rl_start()

        is_terminal = False

        while not is_terminal and step < max_steps:
            reward, state, action, is_terminal = rl_glue.rl_step()
            cum_reward += reward

            step += 1

        if is_terminal:
            episode_end[episodes] = step
            episodes += 1
        rl_glue.rl_cleanup()

    save_results(episode_end, "{}".format(param_info))
Ejemplo n.º 3
0
def main():
    env_class = horsetrack_environment.Environment
    agent_class = random_agent.Agent
    rl_glue = RLGlue(env_class, agent_class)

    num_episodes = 1000
    max_steps = 100000

    print("\tPrinting one dot for every run: {}".format(num_episodes),
          end=' ')
    print("total runs to complete.")

    total_steps = [0 for _ in range(max_steps)]

    for i in range(num_episodes):
        rl_glue.rl_init(agent_info={"actions": env_class.actions})
        rl_glue.rl_start()

        is_terminal = False
        while rl_glue.num_steps < max_steps and not is_terminal:
            reward, state, action, is_terminal = rl_glue.rl_step()
            # optimal_action[num_steps] += 1 if "action is optimal" else 0

        total_steps[i] = rl_glue.num_steps

        rl_glue.rl_cleanup()
        print(".", end='')
        sys.stdout.flush()

    # prop_optimal = [num_optimal / num_episodes for num_optimal in optimal_action]
    save_results(total_steps, len(total_steps), "RL_EXP_OUT.dat")
    print("\nDone")
Ejemplo n.º 4
0
def main(data_output_location="new_data"):

    env_class = horsetrack_environment.Environment
    agent_class = random_agent.Agent

    agent_name = agent_class.__module__[agent_class.__module__.find(".") + 1:]
    environment_name = env_class.__module__[env_class.__module__.find(".") +
                                            1:]

    rl_glue = RLGlue(env_class, agent_class)

    # num_episodes = 2000
    # max_steps = 1000
    max_total_steps = 100_000

    for epsilon in [0.0, 0.1]:
        for alpha in [2, 1, 0.5, 0.25, 0.125, 0.0625]:
            print("Running Agent: {} on Environment: {}.".format(
                agent_name, environment_name))
            agent_init_info = {
                "actions": [-1, 1],
                "world_size": 100,
                "epsilon": epsilon,
                "alpha": alpha
            }
            termination_times = []

            rl_glue.rl_init(agent_init_info=agent_init_info)

            step_counter = 0

            while step_counter < max_total_steps:
                rl_glue.rl_start()
                is_terminal = False

                while step_counter < max_total_steps and not is_terminal:
                    reward, state, action, is_terminal = rl_glue.rl_step()
                    step_counter += 1

                rl_glue.rl_cleanup()
                # print(".", end='')
                sys.stdout.flush()

                if is_terminal:
                    termination_times.append(step_counter)

            epoch_datetime = int(
                (datetime.datetime.now() -
                 datetime.datetime.utcfromtimestamp(0)).total_seconds())

            save_results(
                termination_times, len(termination_times),
                "{}/{}_{}__{}__epsilon{}__alpha{}.dat".format(
                    data_output_location, epoch_datetime, agent_name,
                    environment_name, epsilon, alpha))

    print("\nDone")
Ejemplo n.º 5
0
Archivo: main.py Proyecto: healqq/lumia
def run_experiment(env_info, agent_info, 
                   num_episodes=5000,
                   value_error_threshold=1e-8,
                   plot_freq=10):
    env = GridEnvironment
    agent = TDAgent
    rl_glue = RLGlue(env, agent)

    rl_glue.rl_init(agent_info, env_info)
    steps = []
    for episode in range(1, num_episodes + 1):
        rl_glue.rl_episode(0) # no step limit
        steps.append(rl_glue.agent.agent_message("get_steps"))
        if episode % plot_freq == 0:
            values = rl_glue.agent.agent_message("get_values")
            print(rl_glue.environment.env_message("get_grid_state"))
        rl_glue.rl_cleanup()
    values = rl_glue.agent.agent_message("get_values")
    
    return [values, steps]