Example #1
0
    def testGymPreprocessors(self):
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("CartPole-v0"))
        assert type(p1) == NoPreprocessor

        p2 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("FrozenLake-v0"))
        assert type(p2) == OneHotPreprocessor
Example #2
0
    def testGymPreprocessors(self):
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("CartPole-v0"))
        self.assertEqual(type(p1), NoPreprocessor)

        p2 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("FrozenLake-v0"))
        self.assertEqual(type(p2), OneHotPreprocessor)
Example #3
0
 def test_custom_preprocessor(self):
     ray.init(object_store_memory=1000 * 1024 * 1024)
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(env)
     self.assertEqual(type(p3), NoPreprocessor)
Example #4
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(env)
     self.assertEqual(type(p3), NoPreprocessor)
Example #5
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     self.assertEqual(type(p3), NoPreprocessor)
Example #6
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     assert type(p1) == CustomPreprocessor
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     assert type(p2) == CustomPreprocessor2
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     assert type(p3) == NoPreprocessor
Example #7
0
    def _init(self):

        policy_params = {"ac_noise_std": 0.01}

        env = self.env_creator()
        preprocessor = ModelCatalog.get_preprocessor(
            env.spec.id, env.observation_space.shape)
        preprocessor_shape = preprocessor.transform_shape(
            env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(env.observation_space,
                                             env.action_space, preprocessor,
                                             **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
Example #8
0
File: es.py Project: ml-squad/ray
    def _init(self):
        policy_params = {"action_noise_std": 0.01}

        env = self.env_creator(self.config["env_config"])
        preprocessor = ModelCatalog.get_preprocessor(self.registry, env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(self.registry, self.sess,
                                             env.action_space, preprocessor,
                                             self.config["observation_filter"],
                                             **policy_params)
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.registry, self.config, policy_params,
                          self.env_creator, noise_id)
            for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
Example #9
0
 def __init__(self, env_id, env=None, options=dict()):
     super(RLLibPreprocessing, self).__init__(env)
     self.preprocessor = ModelCatalog.get_preprocessor(
         env_id, env.observation_space.shape, options)
     self._process_shape = self.preprocessor.transform_shape(
         env.observation_space.shape)
     self.observation_space = Box(-1.0, 1.0, self._process_shape)
Example #10
0
    def __init__(self,
                 config,
                 policy_params,
                 env_creator,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator()
        self.preprocessor = ModelCatalog.get_preprocessor(
            self.env.spec.id, self.env.observation_space.shape)
        self.preprocessor_shape = self.preprocessor.transform_shape(
            self.env.observation_space.shape)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(self.env.observation_space,
                                             self.env.action_space,
                                             self.preprocessor,
                                             **policy_params)
        tf_util.initialize()

        self.rs = np.random.RandomState()

        assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"]
                                              != 0))
Example #11
0
File: env.py Project: the-sea/ray
 def __init__(self, name, batchsize):
     self.envs = [gym.make(name) for _ in range(batchsize)]
     self.observation_space = self.envs[0].observation_space
     self.action_space = self.envs[0].action_space
     self.batchsize = batchsize
     self.preprocessor = ModelCatalog.get_preprocessor(
         name, self.envs[0].observation_space.shape)
 def __init__(self, env_creator, batchsize, options):
     self.envs = [env_creator() for _ in range(batchsize)]
     self.observation_space = self.envs[0].observation_space
     self.action_space = self.envs[0].action_space
     self.batchsize = batchsize
     self.preprocessor = ModelCatalog.get_preprocessor(
         self.envs[0], options["model"])
     self.extra_frameskip = options.get("extra_frameskip", 1)
     assert self.extra_frameskip >= 1
Example #13
0
 def __init__(self, name, batchsize, options):
     self.envs = [gym.make(name) for _ in range(batchsize)]
     self.observation_space = self.envs[0].observation_space
     self.action_space = self.envs[0].action_space
     self.batchsize = batchsize
     self.preprocessor = ModelCatalog.get_preprocessor(
         name, self.envs[0].observation_space.shape, options["model"])
     self.extra_frameskip = options.get("extra_frameskip", 1)
     assert self.extra_frameskip >= 1
Example #14
0
def get_preprocessor_as_wrapper(env, options={}):
    """Returns a preprocessor as a gym observation wrapper.
    Args:
        env (gym.Env): The gym environment to wrap.
        options (dict): Options to pass to the preprocessor.
    Returns:
        wrapper (gym.ObservationWrapper): Preprocessor in wrapper form.
    """

    preprocessor = ModelCatalog.get_preprocessor(env, options)
    return _RLlibPreprocessorWrapper(env, preprocessor)
Example #15
0
    def testTuplePreprocessor(self):
        ray.init()

        class TupleEnv(object):
            def __init__(self):
                self.observation_space = Tuple(
                    [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)])
        p1 = ModelCatalog.get_preprocessor(TupleEnv())
        self.assertEqual(p1.shape, (8,))
        self.assertEqual(
            list(p1.transform((0, [1, 2, 3]))),
            [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
Example #16
0
    def test_tuple_preprocessor(self):
        ray.init(object_store_memory=1000 * 1024 * 1024)

        class TupleEnv:
            def __init__(self):
                self.observation_space = Tuple(
                    [Discrete(5),
                     Box(0, 5, shape=(3, ), dtype=np.float32)])

        p1 = ModelCatalog.get_preprocessor(TupleEnv())
        self.assertEqual(p1.shape, (8, ))
        self.assertEqual(list(p1.transform((0, np.array([1, 2, 3])))),
                         [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
Example #17
0
    def testTuplePreprocessor(self):
        ray.init()

        class TupleEnv(object):
            def __init__(self):
                self.observation_space = Tuple(
                    [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)])
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), TupleEnv())
        self.assertEqual(p1.shape, (8,))
        self.assertEqual(
            list(p1.transform((0, [1, 2, 3]))),
            [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
Example #18
0
    def __init__(self, registry, config, policy_params, env_creator, noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator(config["env_config"])
        self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(
            registry, self.sess, self.env.action_space, self.preprocessor,
            config["observation_filter"], **policy_params)
Example #19
0
    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "EvolutionStrategies"})

        Algorithm.__init__(self, env_name, config, upload_dir=upload_dir)

        policy_params = {"ac_noise_std": 0.01}

        env = gym.make(env_name)
        preprocessor = ModelCatalog.get_preprocessor(
            env_name, env.observation_space.shape)
        preprocessor_shape = preprocessor.transform_shape(
            env.observation_space.shape)

        utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(env.observation_space,
                                             env.action_space, preprocessor,
                                             **policy_params)
        tf_util.initialize()
        self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2)

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote()
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(config, policy_params, env_name, noise_id)
            for _ in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.tstart = time.time()
        self.iteration = 0
Example #20
0
def visualizer_rllib(args):
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    # config = get_rllib_config(result_dir + '/..')
    # pkl = get_rllib_pkl(result_dir + '/..')
    config = get_rllib_config(result_dir)
    # TODO(ev) backwards compatibility hack
    try:
        pkl = get_rllib_pkl(result_dir)
    except Exception:
        pass

    # check if we have a multiagent scenario but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policy_graphs', {}):
        multiagent = True
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(
        params=flow_params, version=0, render=False)
    register_env(env_name, create_env)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument '
                  + '\'{}\' passed in '.format(args.run)
                  + 'differs from the one stored in params.json '
                  + '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = False
    sim_params.emission_path = './test_time_rollout/'

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Recreate the scenario from the pickled parameters
    exp_tag = flow_params['exp_tag']
    net_params = flow_params['net']
    vehicles = flow_params['veh']
    initial_config = flow_params['initial']
    module = __import__('flow.scenarios', fromlist=[flow_params['scenario']])
    scenario_class = getattr(module, flow_params['scenario'])

    scenario = scenario_class(
        name=exp_tag,
        vehicles=vehicles,
        net_params=net_params,
        initial_config=initial_config)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    single_agent_envs = [env for env in dir(flow.envs)
                         if not env.startswith('__')]

    if flow_params['env_name'] in single_agent_envs:
        env_loc = 'flow.envs'
    else:
        env_loc = 'flow.multiagent_envs'

    # Start the environment with the gui turned on and a path for the
    # emission file
    module = __import__(env_loc, fromlist=[flow_params['env_name']])
    env_class = getattr(module, flow_params['env_name'])
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    _env = env_class(
        env_params=env_params,
        sim_params=sim_params,
        scenario=scenario,
        simulator=flow_params['simulator']
    )
    _prep = ModelCatalog.get_preprocessor(_env, options={})
    env = _RLlibPreprocessorWrapper(_env, _prep)

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policy_graphs'].keys():
            rets[key] = []
    else:
        rets = []
    final_outflows = []
    mean_speed = []
    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    action[agent_id] = agent.compute_action(
                        state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        mean_speed.append(np.mean(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))
    if multiagent:
        for agent_id, rew in rets.items():
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print('Average, std return: {}, {}'.format(
            np.mean(rets), np.std(rets)))
    print('Average, std speed: {}, {}'.format(
        np.mean(mean_speed), np.std(mean_speed)))
    print('Average, std outflow: {}, {}'.format(
        np.mean(final_outflows), np.std(final_outflows)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering')
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
Example #21
0
def test_preprocessor():
    ModelCatalog.register_preprocessor("FakeEnv-v0", FakePreprocessor)
    env = FakeEnv()
    preprocessor = ModelCatalog.get_preprocessor(env)
    assert type(preprocessor) == FakePreprocessor