Esempio n. 1
0
def main(local_dir):
    """Main loop based on `rllib.examples.saving_experiences`."""
    # pylint: disable=too-many-locals
    batch_builder = SampleBatchBuilder()
    writer = JsonWriter(local_dir)
    env = _industrial_benchmark_maker({"max_episode_steps": 1000})
    policy = IBBehaviorPolicy(env.observation_space, env.action_space, {})

    for eps_id in trange(100):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
        prev_reward = 0
        done = False
        time = 0
        while not done:
            action, _, _ = policy.compute_single_action(obs, [])
            new_obs, rew, done, info = env.step(action)
            batch_builder.add_values(
                t=time,
                eps_id=eps_id,
                agent_index=0,
                obs=obs,
                actions=action,
                action_prob=1.0,  # put the true action probability here
                rewards=rew,
                prev_actions=prev_action,
                prev_rewards=prev_reward,
                dones=done,
                infos=info,
                new_obs=new_obs,
            )
            obs = new_obs
            prev_action = action
            prev_reward = rew
            time += 1
        writer.write(batch_builder.build_and_reset())
Esempio n. 2
0
    def action_space_sample(self):
        dist = np.random.randint(10)
        change_topics = np.random.choice(N_TOPICS,dist,replace=False)
        action = self.observation[len(CONTEXT_ATTRIBUTES):].copy()
        for i in change_topics:
            action[i] = 0 if action[i] else 1
        return action


if __name__ == "__main__":
    args = parser.parse_args()

    batch_builder = SampleBatchBuilder()

    output_path = os.path.join(ray.utils.get_user_temp_dir(), "demo-out")
    writer = JsonWriter(output_path)
    print("OUTPUT IN {}".format(output_path))

    env = NewsWorld()

    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    for eps_id in range(args.stop):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space_sample())
        prev_reward = 0
        done = False
        t = 0
        while not done:
            action = env.action_space_sample()
Esempio n. 3
0
import gym
import numpy as np

from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter

if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter("/tmp/demo-out")

    # You normally wouldn't want to manually create sample batches if a
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make("Acrobot-v1")

    # RLlib uses preprocessors to implement transforms such as one-hot encoding
    # and flattening of tuple and dict observations. For CartPole a no-op
    # preprocessor is used, but this may be relevant for more complex envs.
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    for eps_id in range(20):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
        prev_reward = 0
        done = False
        t = 0
        while not done:
            import ipdb
            ipdb.set_trace()
            action = env.action_space.sample()
import pickle
import numpy as np

from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter

batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
writer = JsonWriter("")

actions = [
    'strength_1', 'strength_2', 'flexibility_1', 'flexibility_2', 'rest'
]
action_to_int_converter = {actions[i]: i for i in range(len(actions))}
#
with open('episode_actions.pkl', 'rb') as file:
    episode_actions = pickle.load(file)

with open('episode_states.pkl', 'rb') as file:
    episode_states = pickle.load(file)
n_episodes = len(episode_actions)

states0 = episode_states[0]
actions0 = episode_actions[0]
# import ipdb; ipdb.set_trace()
# save in batches
for episode_idx in range(n_episodes):
    obs = np.array([0, 0, 0])
    prev_action = None
    prev_reward = None
    done = False
    n_actions = len(episode_actions[episode_idx])
"""Simple example of writing experiences to a file using JsonWriter."""

# __sphinx_doc_begin__
import gym
import numpy as np
import os

import ray.utils

from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter

if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter(os.path.join(ray.utils.get_user_temp_dir(),
                                     "demo-out"))

    # You normally wouldn't want to manually create sample batches if a
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make("CartPole-v0")

    # RLlib uses preprocessors to implement transforms such as one-hot encoding
    # and flattening of tuple and dict observations. For CartPole a no-op
    # preprocessor is used, but this may be relevant for more complex envs.
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    for eps_id in range(100):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
        prev_reward = 0
Esempio n. 6
0
    'R': np.diag(([arcsec2rad**2] * 2 + [1e3**2])),
    'alpha': 0.0001,
    'beta': 2.,
    'kappa': 3 - 6,
    'fx': fx,
    'hx': hx,
    'mean_z': mean_z,
    'residual_z': residual_z,
    'msqrt': robust_cholesky,
    'orbits': sample_orbits,
}

if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter(
        os.path.join(ray.utils.get_user_temp_dir(),
                     "agent_visible_random_10RSOs-out"))

    # You normally wouldn't want to manually create sample batches if a
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make('ssa_tasker_simple-v2', **{'config': env_config})

    # RLlib uses preprocessors to implement transforms such as one-hot encoding
    # and flattening of tuple and dict observations. For CartPole a no-op
    # preprocessor is used, but this may be relevant for more complex envs.
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    for eps_id in tqdm(range(2084)):  #
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
Esempio n. 7
0
    ray.init()
    cls = get_agent_class(args.run)
    agent = cls(env=args.env)
    agent.restore(args.checkpoint)

    # TODO: Refactor this...
    env = agent.workers.local_worker().env
    if args.target_env:
        env_creator = _global_registry.get(ENV_CREATOR, args.target_env)
        target_env = env_creator(env.config)
    else:
        target_env = None

    batch_builder = SampleBatchBuilder()
    writer = JsonWriter(args.out)
    for states_path in args.states:
        with open(states_path, 'r') as states_file:
            states = parse_states_file(states_file, env, args.skip_count)
            trajectory = annotated_trajectory(states, agent, env, target_env)
            prev_action = None
            for (t, (obs, action, new_obs)) in enumerate(trajectory):
                batch_builder.add_values(
                    t=t,
                    eps_id=0,
                    agent_index=0,
                    obs=obs,
                    actions=action,
                    action_prob=
                    1.0,  # TODO: put the true action probability here
                    rewards=0,
Esempio n. 8
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Simple example of writing experiences to a file using JsonWriter."""

# __sphinx_doc_begin__
import gym
import numpy as np

from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter

if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter("/tmp/demo-out")

    # You normally wouldn't want to manually create sample batches if a
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make("CartPole-v0")

    for eps_id in range(100):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
        prev_reward = 0
        done = False
        t = 0
        while not done:
            action = env.action_space.sample()
            new_obs, rew, done, info = env.step(action)
            batch_builder.add_values(
                t=t,
Esempio n. 9
0
def main():
    args = parser.parse_args()

    if args.save_path is None:
        save_path = os.path.join(args.data_path, 'rllib')
    else:
        save_path = args.save_path

    if args.env is None:
        env_list = []
        for env_spec in minerl.herobraine.envs.obfuscated_envs:
            env_list.append(env_spec.name)
    else:
        env_list = [args.env]

    register()

    for env_name in env_list:
        env = gym.make(env_name)
        env = env.MineRLObservationWrapper(env.MineRLActionWrapper(env))

        batch_builder = SampleBatchBuilder()
        writer = JsonWriter(os.path.join(save_path, env_name))
        prep = get_preprocessor(env.observation_space)(env.observation_space)

        env.close()

        data = minerl.data.make(env_name, data_dir=args.data_path)

        for trajectory_name in data.get_trajectory_names():
            t = 0
            prev_action = None
            prev_reward = 0
            done = False
            obs = None
            info = None
            for obs, action, reward, next_obs, done in data.load_data(
                    trajectory_name):
                obs = (obs['pov'], obs['vector'])
                next_obs = (next_obs['pov'], next_obs['vector'])
                action = action['vector']
                if prev_action is None:
                    prev_action = np.zeros_like(action)

                batch_builder.add_values(
                    t=t,
                    eps_id=trajectory_name,
                    agent_index=0,
                    obs=prep.transform(obs),
                    actions=action,
                    action_prob=1.0,  # put the true action probability here
                    rewards=reward,
                    prev_actions=prev_action,
                    prev_rewards=prev_reward,
                    dones=done,
                    infos=info,
                    new_obs=prep.transform(next_obs))
                prev_action = action
                prev_reward = reward
                t += 1
            writer.write(batch_builder.build_and_reset())
Esempio n. 10
0
def write_jsons(
    environment,
    data_dir,
    env_config,
    save_path,
    overwrite=False,
    fail_safe=True,
    **kwargs,
):
    data_pipeline = minerl.data.make(environment, data_dir, **kwargs)
    env = MinerRLDataEnv(data_pipeline)
    env = wrap_env(env, env_config)

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if len(os.listdir(save_path)) != 0:
        abs_save_path = os.path.abspath(save_path)
        if overwrite:
            print(f"Overwriting! {abs_save_path}")
            shutil.rmtree(abs_save_path)
        else:
            if fail_safe:
                print(f"Json data already exists at {abs_save_path}")
                return
            else:
                raise ValueError(
                    f"Directory {abs_save_path} not empty!"
                    f"Cannot overwrite existing data automatically, please delete old data if unused."
                )

    batch_builder = SampleBatchBuilder()
    writer = JsonWriter(save_path)
    prep = get_preprocessor(env.observation_space)(env.observation_space)

    for eps_id, trajectory_name in enumerate(env.trajectory_names):
        t = 0
        prev_action = None
        prev_reward = 0
        done = False
        try:
            obs = env.reset()
        except TypeError:
            continue
        while not done:
            new_obs, reward, done, info = env.step(env.action_space.sample())
            action = info["action"]
            action = env.reverse_action(action)
            if prev_action is None:
                prev_action = np.zeros_like(action)

            batch_builder.add_values(
                t=t,
                eps_id=eps_id,
                agent_index=0,
                obs=prep.transform(obs),
                actions=action,
                action_prob=1.0,  # put the true action probability here
                rewards=reward,
                prev_actions=prev_action,
                prev_rewards=prev_reward,
                dones=done,
                infos={"trajectory_name": trajectory_name},
                new_obs=prep.transform(new_obs),
            )
            obs = new_obs
            prev_action = action
            prev_reward = reward
            t += 1
        writer.write(batch_builder.build_and_reset())
            return False
        if done:
            break
        if human_wants_restart:
            break
        while human_sets_pause:
            env.wrapped.render()
            time.sleep(0.1)
        time.sleep(0.1)
    print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
    writer.write(batch_builder.build_and_reset())


if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter(DEMO_DATA_DIR)

    env = MountainCar()

    # RLlib uses preprocessors to implement transforms such as one-hot encoding
    # and flattening of tuple and dict observations. For CartPole a no-op
    # preprocessor is used, but this may be relevant for more complex envs.
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    if not hasattr(env.action_space, "n"):
        raise Exception("Keyboard agent only supports discrete action spaces")
    ACTIONS = env.action_space.n
    SKIP_CONTROL = 0  # Use previous control decision SKIP_CONTROL times, that's how you
    # can test what skip is still usable.
Esempio n. 12
0
agents = [
    agent_visible_greedy, agent_visible_greedy_spoiled, agent_naive_random
]
rso_count = [10, 20, 40]
env_config['obs_returned'] = 'flatten'
env_config['reward_type'] = 'jones'
episodes = 10000

#!------------ Save experiences generated by the agent
for agent in agents:
    for m in rso_count:
        env_config['rso_count'] = m
        batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
        writer = JsonWriter('/home/ash/ray_results/ssa_experiences/' +
                            agent.__name__ + '/' +
                            str(env_config['rso_count']) + 'RSOs_' +
                            env_config['reward_type'] + '_' +
                            env_config['obs_returned'] + '_' + str(episodes) +
                            'episodes')

        # You normally wouldn't want to manually create sample batches if a
        # simulator is available, but let's do it anyways for example purposes:
        env = gym.make('ssa_tasker_simple-v2', **{'config': env_config})

        # RLlib uses preprocessors to implement transforms such as one-hot encoding
        # and flattening of tuple and dict observations. For CartPole a no-op
        # preprocessor is used, but this may be relevant for more complex envs.
        prep = get_preprocessor(env.observation_space)(env.observation_space)
        print("The preprocessor is", prep)
        for eps_id in tqdm(range(episodes)):
            obs = env.reset()
            prev_action = np.zeros_like(env.action_space.sample())
Esempio n. 13
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "", ["sleep-for-animation=", ""])
    except getopt.GetoptError as err:
        print(str(err))  # will print something like "option -a not recognized"
        sys.exit(2)
    sleep_for_animation = True
    for o, a in opts:
        if o in ("--sleep-for-animation"):
            sleep_for_animation = str2bool(a)
        else:
            assert False, "unhandled option"

    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter("./out/")

    #  Setting these 2 parameters to True can slow down training
    visuals = False
    sleep_for_animation = False

    if visuals:
        from flatland.utils.rendertools import RenderTool

    max_depth = 30
    tree_depth = 2
    trial_start = 100
    n_trials = 999
    start = 0

    columns = [
        'Agents', 'X_DIM', 'Y_DIM', 'TRIAL_NO', 'REWARD', 'NORMALIZED_REWARD',
        'DONE_RATIO', 'STEPS', 'ACTION_PROB'
    ]
    df_all_results = pd.DataFrame(columns=columns)

    for trials in range(trial_start, n_trials + 1):

        env_file = f"envs-100-999/envs/Level_{trials}.pkl"
        # env_file = f"../env_configs/round_1-small/Test_0/Level_{trials}.mpk"

        # file = f"../env_configs/actions-small/Test_0/Level_{trials}.mpk"
        file = f"envs-100-999/actions/envs/Level_{trials}.json"

        if not os.path.isfile(env_file) or not os.path.isfile(file):
            print("Missing file!", env_file, file)
            continue

        step = 0

        obs_builder_object = TreeObsForRailEnv(
            max_depth=tree_depth,
            predictor=ShortestPathPredictorForRailEnv(max_depth))

        env = RailEnv(
            width=1,
            height=1,
            rail_generator=rail_from_file(env_file),
            schedule_generator=schedule_from_file(env_file),
            malfunction_generator_and_process_data=malfunction_from_file(
                env_file),
            obs_builder_object=obs_builder_object)

        obs, info = env.reset(regenerate_rail=True,
                              regenerate_schedule=True,
                              activate_agents=False,
                              random_seed=1001)

        with open(file, "r") as files:
            expert_actions = json.load(files)

        n_agents = env.get_num_agents()
        x_dim, y_dim = env.width, env.height

        agent_obs = [None] * n_agents
        agent_obs_buffer = [None] * n_agents
        done = dict()
        done["__all__"] = False

        if imitate:
            agent_action_buffer = list(expert_actions[step].values())
        else:
            # , p=[0.2, 0, 0.5])  # [0] * n_agents
            agent_action_buffer = np.random.choice(5, n_agents, replace=True)
        update_values = [False] * n_agents

        max_steps = int(4 * 2 * (20 + env.height + env.width))

        action_size = 5  # 3

        # And some variables to keep track of the progress
        action_dict = dict()
        scores_window = deque(maxlen=100)
        reward_window = deque(maxlen=100)
        done_window = deque(maxlen=100)
        action_prob = [0] * action_size

        # agent = Agent(state_size, action_size)

        if visuals:
            env_renderer = RenderTool(env, gl="PILSVG")
            env_renderer.render_env(show=True,
                                    frames=True,
                                    show_observations=True)

        for a in range(n_agents):
            if obs[a]:
                agent_obs[a] = normalize_observation(obs[a],
                                                     tree_depth,
                                                     observation_radius=10)
                agent_obs_buffer[a] = agent_obs[a].copy()

        # Reset score and done
        score = 0
        agent_action_buffer = np.zeros(n_agents)
        # prev_action = np.zeros_like(envs.action_space.sample())
        prev_reward = np.zeros(n_agents)
        for step in range(max_steps):
            for a in range(n_agents):
                if info['action_required'][a]:
                    if imitate:
                        if step < len(expert_actions):
                            action = expert_actions[step][str(a)]
                        else:
                            action = 0
                    else:
                        action = 0

                    action_prob[action] += 1
                    update_values[a] = True

                else:
                    update_values[a] = False
                    action = 0

                action_dict.update({a: action})

            next_obs, all_rewards, done, info = env.step(action_dict)

            for a in range(n_agents):

                if next_obs[a] is not None:
                    agent_obs[a] = normalize_observation(next_obs[a],
                                                         tree_depth,
                                                         observation_radius=10)

                # Only update the values when we are done or when an action
                # was taken and thus relevant information is present
                if update_values[a] or done[a]:
                    start += 1

                    batch_builder.add_values(
                        t=step,
                        eps_id=trials,
                        agent_index=0,
                        obs=agent_obs_buffer[a],
                        actions=action_dict[a],
                        action_prob=1.0,  # put the true action probability
                        rewards=all_rewards[a],
                        prev_actions=agent_action_buffer[a],
                        prev_rewards=prev_reward[a],
                        dones=done[a],
                        infos=info['action_required'][a],
                        new_obs=agent_obs[a])

                agent_obs_buffer[a] = agent_obs[a].copy()
                agent_action_buffer[a] = action_dict[a]
                prev_reward[a] = all_rewards[a]

                score += all_rewards[a]  # / envs.get_num_agents()

            if visuals:
                env_renderer.render_env(show=True,
                                        frames=True,
                                        show_observations=True)
                if sleep_for_animation:
                    time.sleep(0.5)

            if done["__all__"] or step > max_steps:
                writer.write(batch_builder.build_and_reset())
                break

            # Collection information about training
            if step % 100 == 0:
                tasks_finished = 0
                for current_agent in env.agents:
                    if current_agent.status == RailAgentStatus.DONE_REMOVED:
                        tasks_finished += 1
                print(
                    '\rTrial No {} Training {} Agents on ({},{}).\t Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t'
                    .format(
                        trials, env.get_num_agents(), x_dim, y_dim, step,
                        score, score / (max_steps + n_agents), 100 * np.mean(
                            tasks_finished / max(1, env.get_num_agents()))),
                    end=" ")

        tasks_finished = 0
        for current_agent in env.agents:
            if current_agent.status == RailAgentStatus.DONE_REMOVED:
                tasks_finished += 1
        done_window.append(tasks_finished / max(1, env.get_num_agents()))
        reward_window.append(score)
        scores_window.append(score / (max_steps + n_agents))

        data = [[
            n_agents, x_dim, y_dim, trials,
            np.mean(reward_window),
            np.mean(scores_window), 100 * np.mean(done_window), step,
            action_prob / np.sum(action_prob)
        ]]

        df_cur = pd.DataFrame(data, columns=columns)
        df_all_results = pd.concat([df_all_results, df_cur])

        if imitate:
            df_all_results.to_csv(
                f'TreeImitationLearning_DQN_TrainingResults.csv', index=False)

        print(
            '\rTrial No {} Training {} Agents on ({},{}).\t Total Steps {}\t Reward: {:.3f}\t Normalized Reward: {:.3f}\tDones: {:.2f}%\t'
            .format(trials, env.get_num_agents(), x_dim, y_dim, step,
                    np.mean(reward_window), np.mean(scores_window),
                    100 * np.mean(done_window)))

        if visuals:
            env_renderer.close_window()

        gc.collect()