Esempio n. 1
0
    def _setup_runner(self):
        self.status = Trial.RUNNING
        trainable_cls = get_registry().get(
            TRAINABLE_CLASS, self.trainable_name)
        cls = ray.remote(
            num_cpus=self.resources.driver_cpu_limit,
            num_gpus=self.resources.driver_gpu_limit)(trainable_cls)
        if not self.result_logger:
            if not os.path.exists(self.local_dir):
                os.makedirs(self.local_dir)
            self.logdir = tempfile.mkdtemp(
                prefix="{}_{}".format(
                    str(self)[:MAX_LEN_IDENTIFIER], date_str()),
                dir=self.local_dir)
            self.result_logger = UnifiedLogger(
                self.config, self.logdir, self.upload_dir)
        remote_logdir = self.logdir

        def logger_creator(config):
            # Set the working dir in the remote process, for user file writes
            if not os.path.exists(remote_logdir):
                os.makedirs(remote_logdir)
            os.chdir(remote_logdir)
            return NoopLogger(config, remote_logdir)

        # Logging for trials is handled centrally by TrialRunner, so
        # configure the remote runner to use a noop-logger.
        self.runner = cls.remote(
            config=self.config, registry=get_registry(),
            logger_creator=logger_creator)
Esempio n. 2
0
    def testGymPreprocessors(self):
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("CartPole-v0"))
        assert type(p1) == NoPreprocessor

        p2 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("FrozenLake-v0"))
        assert type(p2) == OneHotPreprocessor
Esempio n. 3
0
    def testGymPreprocessors(self):
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("CartPole-v0"))
        self.assertEqual(type(p1), NoPreprocessor)

        p2 = ModelCatalog.get_preprocessor(
            get_registry(), gym.make("FrozenLake-v0"))
        self.assertEqual(type(p2), OneHotPreprocessor)
Esempio n. 4
0
    def testDefaultModels(self):
        ray.init()

        with tf.variable_scope("test1"):
            p1 = ModelCatalog.get_model(
                get_registry(), np.zeros((10, 3), dtype=np.float32), 5)
            self.assertEqual(type(p1), FullyConnectedNetwork)

        with tf.variable_scope("test2"):
            p2 = ModelCatalog.get_model(
                get_registry(), np.zeros((10, 80, 80, 3), dtype=np.float32), 5)
            self.assertEqual(type(p2), VisionNetwork)
Esempio n. 5
0
    def testDefaultModels(self):
        ray.init()

        with tf.variable_scope("test1"):
            p1 = ModelCatalog.get_model(
                get_registry(), np.zeros((10, 3), dtype=np.float32), 5)
            assert type(p1) == FullyConnectedNetwork

        with tf.variable_scope("test2"):
            p2 = ModelCatalog.get_model(
                get_registry(), np.zeros((10, 80, 80, 3), dtype=np.float32), 5)
            assert type(p2) == VisionNetwork
Esempio n. 6
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     self.assertEqual(str(type(p1)), str(CustomPreprocessor))
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     self.assertEqual(type(p3), NoPreprocessor)
Esempio n. 7
0
 def testCustomPreprocessor(self):
     ray.init()
     ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
     ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
     env = gym.make("CartPole-v0")
     p1 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "foo"})
     assert type(p1) == CustomPreprocessor
     p2 = ModelCatalog.get_preprocessor(
         get_registry(), env, {"custom_preprocessor": "bar"})
     assert type(p2) == CustomPreprocessor2
     p3 = ModelCatalog.get_preprocessor(get_registry(), env)
     assert type(p3) == NoPreprocessor
Esempio n. 8
0
    def __init__(self,
                 config={},
                 env=None,
                 registry=get_registry(),
                 logger_creator=None):
        """Initialize an RLLib agent.

        Args:
            config (dict): Algorithm-specific configuration data.
            env (str): Name of the environment to use. Note that this can also
                be specified as the `env` key in config.
            registry (obj): Object registry for user-defined envs, models, etc.
                If unspecified, it will be assumed empty.
            logger_creator (func): Function that creates a ray.tune.Logger
                object. If unspecified, a default logger is created.
        """
        self._initialize_ok = False
        self._experiment_id = uuid.uuid4().hex
        env = env or config.get("env")
        if env:
            config["env"] = env
            if registry and registry.contains(ENV_CREATOR, env):
                self.env_creator = registry.get(ENV_CREATOR, env)
            else:
                import gym  # soft dependency
                self.env_creator = lambda env_config: gym.make(env)
        else:
            self.env_creator = lambda env_config: None
        self.config = self._default_config.copy()
        self.registry = registry

        self.config = _deep_update(self.config, config,
                                   self._allow_unknown_configs,
                                   self._allow_unknown_subkeys)

        if logger_creator:
            self._result_logger = logger_creator(self.config)
            self.logdir = self._result_logger.logdir
        else:
            logdir_suffix = "{}_{}_{}".format(
                env, self._agent_name,
                datetime.today().strftime("%Y-%m-%d_%H-%M-%S"))
            if not os.path.exists(DEFAULT_RESULTS_DIR):
                os.makedirs(DEFAULT_RESULTS_DIR)
            self.logdir = tempfile.mkdtemp(prefix=logdir_suffix,
                                           dir=DEFAULT_RESULTS_DIR)
            self._result_logger = UnifiedLogger(self.config, self.logdir, None)

        self._iteration = 0
        self._time_total = 0.0
        self._timesteps_total = 0

        with tf.Graph().as_default():
            self._init()

        self._initialize_ok = True
Esempio n. 9
0
 def setUp(self):
     ray.init(num_cpus=1)
     config = DEFAULT_CONFIG.copy()
     config["num_workers"] = 1
     config["observation_filter"] = "ConcurrentMeanStdFilter"
     config["reward_filter"] = "MeanStdFilter"
     config["batch_size"] = 2
     self._temp_dir = tempfile.mkdtemp("a3c_evaluator_test")
     self.e = A3CEvaluator(get_registry(),
                           lambda config: gym.make("CartPole-v0"),
                           config,
                           logdir=self._temp_dir)
Esempio n. 10
0
 def _setup_runner(self):
     self.status = Trial.RUNNING
     trainable_cls = get_registry().get(TRAINABLE_CLASS,
                                        self.trainable_name)
     cls = ray.remote(
         num_cpus=self.resources.driver_cpu_limit,
         num_gpus=self.resources.driver_gpu_limit)(trainable_cls)
     if not self.result_logger:
         if not os.path.exists(self.local_dir):
             os.makedirs(self.local_dir)
         self.logdir = tempfile.mkdtemp(prefix=str(self),
                                        dir=self.local_dir)
         self.result_logger = UnifiedLogger(self.config, self.logdir,
                                            self.upload_dir)
     remote_logdir = self.logdir
     # Logging for trials is handled centrally by TrialRunner, so
     # configure the remote runner to use a noop-logger.
     self.runner = cls.remote(
         config=self.config,
         registry=get_registry(),
         logger_creator=lambda config: NoopLogger(config, remote_logdir))
Esempio n. 11
0
    def testTuplePreprocessor(self):
        ray.init()

        class TupleEnv(object):
            def __init__(self):
                self.observation_space = Tuple(
                    [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)])
        p1 = ModelCatalog.get_preprocessor(
            get_registry(), TupleEnv())
        self.assertEqual(p1.shape, (8,))
        self.assertEqual(
            list(p1.transform((0, [1, 2, 3]))),
            [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
Esempio n. 12
0
 def setUp(self):
     ray.init(num_cpus=1)
     config = DEFAULT_CONFIG.copy()
     config["num_workers"] = 1
     config["observation_filter"] = "ConcurrentMeanStdFilter"
     config["reward_filter"] = "MeanStdFilter"
     config["batch_size"] = 2
     self._temp_dir = tempfile.mkdtemp("a3c_evaluator_test")
     self.e = A3CEvaluator(
         get_registry(),
         lambda config: gym.make("CartPole-v0"),
         config,
         logdir=self._temp_dir)
Esempio n. 13
0
    def __init__(
            self, config={}, env=None, registry=get_registry(),
            logger_creator=None):
        """Initialize an RLLib agent.

        Args:
            config (dict): Algorithm-specific configuration data.
            env (str): Name of the environment to use. Note that this can also
                be specified as the `env` key in config.
            registry (obj): Object registry for user-defined envs, models, etc.
                If unspecified, the default registry will be used.
            logger_creator (func): Function that creates a ray.tune.Logger
                object. If unspecified, a default logger is created.
        """

        # Agents allow env ids to be passed directly to the constructor.
        self._env_id = env or config.get("env")
        Trainable.__init__(self, config, registry, logger_creator)
Esempio n. 14
0
def get_compute_action_rllib(path_to_dir, checkpoint_num, alg):
    """Collect the compute_action method from RLlib's serialized files.

    Parameters
    ----------
    path_to_dir : str
        RLlib directory containing training results
    checkpoint_num : int
        checkpoint number / training iteration of the learned policy
    alg : str
        name of the RLlib algorithm that was used during the training
        procedure

    Returns
    -------
    method
        the compute_action method from the algorithm along with the trained
        parameters
    """
    # collect the configuration information from the RLlib checkpoint
    result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1]
    config = get_rllib_config(result_dir)

    # run on only one cpu for rendering purposes
    ray.init(num_cpus=1)
    config["num_workers"] = 1

    # create and register a gym+rllib env
    flow_params = get_flow_params(config)
    create_env, env_name = make_create_env(params=flow_params,
                                           version=9999,
                                           render=False)
    register_env(env_name, create_env)

    # recreate the agent
    agent_cls = get_agent_class(alg)
    agent = agent_cls(env=env_name, registry=get_registry(), config=config)

    # restore the trained parameters into the policy
    checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num)
    agent._restore(checkpoint)

    return agent.compute_action
Esempio n. 15
0
    def __init__(self, config=None, registry=None, logger_creator=None):
        """Initialize an Trainable.

        Subclasses should prefer defining ``_setup()`` instead of overriding
        ``__init__()`` directly.

        Args:
            config (dict): Trainable-specific configuration data.
            registry (obj): Object registry for user-defined envs, models, etc.
                If unspecified, the default registry will be used.
            logger_creator (func): Function that creates a ray.tune.Logger
                object. If unspecified, a default logger is created.
        """

        if registry is None:
            from ray.tune.registry import get_registry
            registry = get_registry()

        self._initialize_ok = False
        self._experiment_id = uuid.uuid4().hex
        self.config = config or {}
        self.registry = registry

        if logger_creator:
            self._result_logger = logger_creator(self.config)
            self.logdir = self._result_logger.logdir
        else:
            logdir_prefix = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
            if not os.path.exists(DEFAULT_RESULTS_DIR):
                os.makedirs(DEFAULT_RESULTS_DIR)
            self.logdir = tempfile.mkdtemp(
                prefix=logdir_prefix, dir=DEFAULT_RESULTS_DIR)
            self._result_logger = UnifiedLogger(self.config, self.logdir, None)

        self._iteration = 0
        self._time_total = 0.0
        self._timesteps_total = 0
        self._setup()
        self._initialize_ok = True
        self._local_ip = ray.services.get_node_ip_address()
Esempio n. 16
0
    def __init__(self, config=None, registry=None, logger_creator=None):
        """Initialize an Trainable.

        Subclasses should prefer defining ``_setup()`` instead of overriding
        ``__init__()`` directly.

        Args:
            config (dict): Trainable-specific configuration data.
            registry (obj): Object registry for user-defined envs, models, etc.
                If unspecified, the default registry will be used.
            logger_creator (func): Function that creates a ray.tune.Logger
                object. If unspecified, a default logger is created.
        """

        if registry is None:
            from ray.tune.registry import get_registry
            registry = get_registry()

        self._initialize_ok = False
        self._experiment_id = uuid.uuid4().hex
        self.config = config or {}
        self.registry = registry

        if logger_creator:
            self._result_logger = logger_creator(self.config)
            self.logdir = self._result_logger.logdir
        else:
            logdir_prefix = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
            if not os.path.exists(DEFAULT_RESULTS_DIR):
                os.makedirs(DEFAULT_RESULTS_DIR)
            self.logdir = tempfile.mkdtemp(prefix=logdir_prefix,
                                           dir=DEFAULT_RESULTS_DIR)
            self._result_logger = UnifiedLogger(self.config, self.logdir, None)

        self._iteration = 0
        self._time_total = 0.0
        self._timesteps_total = 0
        self._setup()
        self._initialize_ok = True
        self._local_ip = ray.services.get_node_ip_address()
Esempio n. 17
0
    def test_ray(self):
        """
        Integration test for ray/rllib + flow
        """

        # Test 1: test_two_level_ray
        config = ppo.DEFAULT_CONFIG.copy()
        num_workers = 1
        ray.init(num_cpus=num_workers, redirect_output=False)
        config["num_workers"] = num_workers
        config["timesteps_per_batch"] = min(HORIZON * num_workers, 128)
        config["num_sgd_iter"] = 1
        config["model"].update({"fcnet_hiddens": [3, 3]})
        config["gamma"] = 0.999
        config["min_steps_per_task"] = HORIZON
        config["horizon"] = HORIZON
        config["sgd_batchsize"] = 4

        additional_env_params = {
            "target_velocity": 8,
            "scenario_type": LoopScenario
        }
        additional_net_params = {
            "length": 260,
            "lanes": 1,
            "speed_limit": 30,
            "resolution": 40
        }
        vehicle_params = [
            dict(veh_id="rl",
                 num_vehicles=1,
                 acceleration_controller=(RLController, {}),
                 routing_controller=(ContinuousRouter, {})),
            dict(veh_id="idm",
                 num_vehicles=21,
                 acceleration_controller=(IDMController, {}),
                 routing_controller=(ContinuousRouter, {}))
        ]

        flow_params = dict(sumo=dict(sim_step=0.1, no_step_log=False),
                           env=dict(horizon=HORIZON,
                                    additional_params=additional_env_params),
                           net=dict(no_internal_links=False,
                                    additional_params=additional_net_params),
                           veh=vehicle_params,
                           initial=dict(spacing="uniform",
                                        bunching=30,
                                        min_gap=0))

        flow_env_name = "WaveAttenuationPOEnv"
        create_env, env_name = make_create_env(flow_env_name, flow_params, 0)

        # Register as rllib env
        registry.register_env(env_name, create_env)

        alg = ppo.PPOAgent(env=env_name,
                           registry=registry.get_registry(),
                           config=config)
        for i in range(1):
            alg.train()
            checkpoint_path = alg.save()
            self.assertTrue("%s.index" % os.path.exists(checkpoint_path))

        # Test 2: test_two_level_ray
        # Integration test for two-level fcnet policy
        # FIXME(cathywu) ray restart currently not supported, so need to tie
        # integration tests together for the time being.
        # reload(ppo)
        # reload(registry)
        config = ppo.DEFAULT_CONFIG.copy()
        num_workers = 1
        # ray.init(num_cpus=num_workers, redirect_output=True)
        config["num_workers"] = num_workers
        config["timesteps_per_batch"] = min(HORIZON * num_workers, 128)
        config["num_sgd_iter"] = 1
        config["model"].update({"fcnet_hiddens": [3, 3]})
        config["gamma"] = 0.999
        config["min_steps_per_task"] = HORIZON
        config["horizon"] = HORIZON
        config["sgd_batchsize"] = 4

        config["model"].update({"fcnet_hiddens": [5, 3]}, )
        options = {
            "num_subpolicies": 2,
            "fn_choose_subpolicy": fn_choose_subpolicy,
            "hierarchical_fcnet_hiddens": [[3, 3]] * 2
        }
        config["model"].update({"custom_options": options})
Esempio n. 18
0
    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=args.config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)

    if args.run == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(get_registry(), env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(get_registry(),
                                                       gym.make(args.env))
    if args.out is not None:
        rollouts = []
    steps = 0
    while steps < (num_steps or steps + 1):
        if args.out is not None:
            rollout = []
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done and steps < (num_steps or steps + 1):
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
Esempio n. 19
0
 def testCustomModel(self):
     ray.init()
     ModelCatalog.register_custom_model("foo", CustomModel)
     p1 = ModelCatalog.get_model(
         get_registry(), 1, 5, {"custom_model": "foo"})
     self.assertEqual(str(type(p1)), str(CustomModel))
Esempio n. 20
0
    config = get_rllib_config(result_dir)

    # Run on only one cpu for rendering purposes
    ray.init(num_cpus=1)
    config["num_workers"] = 1

    flow_params = get_flow_params(config)

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params,
                                           version=0,
                                           sumo_binary="sumo")
    register_env(env_name, create_env)

    agent_cls = get_agent_class(args.run)
    agent = agent_cls(env=env_name, registry=get_registry(), config=config)
    checkpoint = result_dir + '/checkpoint-' + args.checkpoint_num
    agent._restore(checkpoint)

    # Recreate the scenario from the pickled parameters
    exp_tag = flow_params["exp_tag"]
    net_params = flow_params['net']
    vehicles = flow_params['veh']
    initial_config = flow_params['initial']
    module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]])
    scenario_class = getattr(module, flow_params["scenario"])
    module = __import__("flow.scenarios", fromlist=[flow_params["generator"]])
    generator_class = getattr(module, flow_params["generator"])

    scenario = scenario_class(name=exp_tag,
                              generator_class=generator_class,
Esempio n. 21
0
        "num_subpolicies": 2,
        "fn_choose_subpolicy": fn_choose_subpolicy,
        "hierarchical_fcnet_hiddens": [[32, 32]] * 2
    }
    config["model"].update({"custom_options": options})

    flow_env_name = "TwoLoopsMergePOEnv"
    exp_tag = "merge_two_level_policy_example"
    this_file = os.path.basename(__file__)[:-3]  # filename without '.py'
    flow_params["flowenv"] = flow_env_name
    flow_params["exp_tag"] = exp_tag
    flow_params["module"] = os.path.basename(__file__)[:-3]
    config['model']['custom_options'].update({
        'flowenv': flow_env_name,
        'exp_tag': exp_tag,
        'module': this_file
    })
    create_env, env_name = make_create_env(flow_env_name,
                                           flow_params,
                                           version=0,
                                           exp_tag=exp_tag)

    # Register as rllib env
    register_rllib_env(env_name, create_env)

    alg = ppo.PPOAgent(env=env_name, registry=get_registry(), config=config)
    for i in range(2):
        alg.train()
        if i % 20 == 0:
            alg.save()  # save checkpoint
Esempio n. 22
0
    def __init__(self,
                 env_creator,
                 policy_graph,
                 tf_session_creator=None,
                 batch_steps=100,
                 batch_mode="truncate_episodes",
                 preprocessor_pref="rllib",
                 sample_async=False,
                 compress_observations=False,
                 observation_filter="NoFilter",
                 registry=None,
                 env_config=None,
                 model_config=None,
                 policy_config=None):
        """Initialize a policy evaluator.

        Arguments:
            env_creator (func): Function that returns a gym.Env given an
                env config dict.
            policy_graph (class): A class implementing rllib.PolicyGraph or
                rllib.TFPolicyGraph.
            tf_session_creator (func): A function that returns a TF session.
                This is optional and only useful with TFPolicyGraph.
            batch_steps (int): The target number of env transitions to include
                in each sample batch returned from this evaluator.
            batch_mode (str): One of the following choices:
                complete_episodes: each batch will be at least batch_steps
                    in size, and will include one or more complete episodes.
                truncate_episodes: each batch will be around batch_steps
                    in size, and include transitions from one episode only.
                pack_episodes: each batch will be exactly batch_steps in
                    size, and may include transitions from multiple episodes.
            preprocessor_pref (str): Whether to prefer RLlib preprocessors
                ("rllib") or deepmind ("deepmind") when applicable.
            sample_async (bool): Whether to compute samples asynchronously in
                the background, which improves throughput but can cause samples
                to be slightly off-policy.
            compress_observations (bool): If true, compress the observations
                returned.
            observation_filter (str): Name of observation filter to use.
            registry (tune.Registry): User-registered objects. Pass in the
                value from tune.registry.get_registry() if you're having
                trouble resolving things like custom envs.
            env_config (dict): Config to pass to the env creator.
            model_config (dict): Config to use when creating the policy model.
            policy_config (dict): Config to pass to the policy.
        """

        registry = registry or get_registry()
        env_config = env_config or {}
        policy_config = policy_config or {}
        model_config = model_config or {}

        assert batch_mode in [
            "complete_episodes", "truncate_episodes", "pack_episodes"
        ]
        self.env_creator = env_creator
        self.policy_graph = policy_graph
        self.batch_steps = batch_steps
        self.batch_mode = batch_mode
        self.compress_observations = compress_observations

        self.env = env_creator(env_config)
        is_atari = hasattr(self.env.unwrapped, "ale")
        if is_atari and "custom_preprocessor" not in model_config and \
                preprocessor_pref == "deepmind":
            self.env = wrap_deepmind(self.env, dim=model_config.get("dim", 80))
        else:
            self.env = ModelCatalog.get_preprocessor_as_wrapper(
                registry, self.env, model_config)

        self.vectorized = hasattr(self.env, "vector_reset")
        self.policy_map = {}

        if issubclass(policy_graph, TFPolicyGraph):
            with tf.Graph().as_default():
                if tf_session_creator:
                    self.sess = tf_session_creator()
                else:
                    self.sess = tf.Session(config=tf.ConfigProto(
                        gpu_options=tf.GPUOptions(allow_growth=True)))
                with self.sess.as_default():
                    policy = policy_graph(self.env.observation_space,
                                          self.env.action_space, registry,
                                          policy_config)
        else:
            policy = policy_graph(self.env.observation_space,
                                  self.env.action_space, registry,
                                  policy_config)
        self.policy_map = {"default": policy}

        self.obs_filter = get_filter(observation_filter,
                                     self.env.observation_space.shape)
        self.filters = {"obs_filter": self.obs_filter}

        if self.vectorized:
            raise NotImplementedError("Vector envs not yet supported")
        else:
            if batch_mode not in [
                    "pack_episodes", "truncate_episodes", "complete_episodes"
            ]:
                raise NotImplementedError("Batch mode not yet supported")
            pack = batch_mode == "pack_episodes"
            if batch_mode == "complete_episodes":
                batch_steps = 999999
            if sample_async:
                self.sampler = AsyncSampler(self.env,
                                            self.policy_map["default"],
                                            self.obs_filter,
                                            batch_steps,
                                            pack=pack)
                self.sampler.start()
            else:
                self.sampler = SyncSampler(self.env,
                                           self.policy_map["default"],
                                           self.obs_filter,
                                           batch_steps,
                                           pack=pack)
Esempio n. 23
0
    )


def create_env(env_config):
    pass_params_to_gym(env_name)
    env = gym.envs.make(env_name)
    return env


if __name__ == '__main__':
    register_env(env_name, lambda env_config: create_env(env_config))
    config = ppo.DEFAULT_CONFIG.copy()
    horizon = 10
    num_cpus = 4
    ray.init(num_cpus=num_cpus, redirect_output=True)
    config["num_workers"] = num_cpus
    config["timesteps_per_batch"] = 10
    config["num_sgd_iter"] = 10
    config["gamma"] = 0.999
    config["horizon"] = horizon
    config["use_gae"] = False
    config["model"].update({"fcnet_hiddens": [256, 256]})
    options = {"multiagent_obs_shapes": [2, 2],
               "multiagent_act_shapes": [1, 1],
               "multiagent_shared_model": False,
               "multiagent_fcnet_hiddens": [[32, 32]] * 2}
    config["model"].update({"custom_options": options})
    alg = ppo.PPOAgent(env=env_name, registry=get_registry(), config=config)
    for i in range(1):
        alg.train()
Esempio n. 24
0
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)

    env = ModelCatalog.get_preprocessor_as_wrapper(get_registry(),
                                                   gym.make(args.env))
    if args.out is not None:
        rollouts = []
    steps = 0
    while steps < (num_steps or steps + 1):
        if args.out is not None:
            rollout = []
        state = env.reset()
        done = False
        while not done and steps < (num_steps or steps + 1):
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            if not args.no_render:
                env.render()
            if args.out is not None:
Esempio n. 25
0
output_path = join(args.output, datetime.today().strftime('%Y-%m-%d-%H-%M-%S'))
logging_path = join(output_path, 'log.txt')
if not isdir(output_path):
    makedirs(output_path)

if isfile('usernames'):
    remove('usernames')

env_creator_name = "PokeBattleEnv-v0"
register_env(env_creator_name, lambda config: PokeBattleEnv(ShowdownSimulator(self_play=False, logging_file=logging_path)))

ray.init()
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = args.workers
config['timesteps_per_batch'] = args.batch_steps
config['horizon'] = 500
config['min_steps_per_task'] = 1
config['gamma'] = 1
config['model']['fcnet_hiddens'] = [2000, 500, 100]
agent = ppo.PPOAgent(config=config, env=env_creator_name, registry=get_registry())

if args.restore is not None:
    agent.restore(args.restore)

for i in range(args.iterations):
    result = agent.train()
    print(f"result: {result}")
    if i % args.save_iterations == 0:
        agent.save(checkpoint_dir=output_path)
Esempio n. 26
0
    def __init__(self,
                 env_creator,
                 policy_graph,
                 tf_session_creator=None,
                 batch_steps=100,
                 batch_mode="truncate_episodes",
                 episode_horizon=None,
                 preprocessor_pref="rllib",
                 sample_async=False,
                 compress_observations=False,
                 num_envs=1,
                 observation_filter="NoFilter",
                 registry=None,
                 env_config=None,
                 model_config=None,
                 policy_config=None):
        """Initialize a policy evaluator.

        Arguments:
            env_creator (func): Function that returns a gym.Env given an
                env config dict.
            policy_graph (class): A class implementing rllib.PolicyGraph or
                rllib.TFPolicyGraph.
            tf_session_creator (func): A function that returns a TF session.
                This is optional and only useful with TFPolicyGraph.
            batch_steps (int): The target number of env transitions to include
                in each sample batch returned from this evaluator.
            batch_mode (str): One of the following batch modes:
                "truncate_episodes": Each call to sample() will return a batch
                    of exactly `batch_steps` in size. Episodes may be truncated
                    in order to meet this size requirement. When
                    `num_envs > 1`, episodes will be truncated to sequences of
                    `batch_size / num_envs` in length.
                "complete_episodes": Each call to sample() will return a batch
                    of at least `batch_steps in size. Episodes will not be
                    truncated, but multiple episodes may be packed within one
                    batch to meet the batch size. Note that when
                    `num_envs > 1`, episode steps will be buffered until the
                    episode completes, and hence batches may contain
                    significant amounts of off-policy data.
            episode_horizon (int): Whether to stop episodes at this horizon.
            preprocessor_pref (str): Whether to prefer RLlib preprocessors
                ("rllib") or deepmind ("deepmind") when applicable.
            sample_async (bool): Whether to compute samples asynchronously in
                the background, which improves throughput but can cause samples
                to be slightly off-policy.
            compress_observations (bool): If true, compress the observations
                returned.
            num_envs (int): If more than one, will create multiple envs
                and vectorize the computation of actions. This has no effect if
                if the env already implements VectorEnv.
            observation_filter (str): Name of observation filter to use.
            registry (tune.Registry): User-registered objects. Pass in the
                value from tune.registry.get_registry() if you're having
                trouble resolving things like custom envs.
            env_config (dict): Config to pass to the env creator.
            model_config (dict): Config to use when creating the policy model.
            policy_config (dict): Config to pass to the policy.
        """

        registry = registry or get_registry()
        env_config = env_config or {}
        policy_config = policy_config or {}
        model_config = model_config or {}
        self.env_creator = env_creator
        self.policy_graph = policy_graph
        self.batch_steps = batch_steps
        self.batch_mode = batch_mode
        self.compress_observations = compress_observations

        self.env = env_creator(env_config)
        if isinstance(self.env, VectorEnv) or \
                isinstance(self.env, ServingEnv) or \
                isinstance(self.env, AsyncVectorEnv):

            def wrap(env):
                return env  # we can't auto-wrap these env types
        elif is_atari(self.env) and \
                "custom_preprocessor" not in model_config and \
                preprocessor_pref == "deepmind":

            def wrap(env):
                return wrap_deepmind(env, dim=model_config.get("dim", 80))
        else:

            def wrap(env):
                return ModelCatalog.get_preprocessor_as_wrapper(
                    registry, env, model_config)

        self.env = wrap(self.env)

        def make_env():
            return wrap(env_creator(env_config))

        self.policy_map = {}

        if issubclass(policy_graph, TFPolicyGraph):
            with tf.Graph().as_default():
                if tf_session_creator:
                    self.sess = tf_session_creator()
                else:
                    self.sess = tf.Session(config=tf.ConfigProto(
                        gpu_options=tf.GPUOptions(allow_growth=True)))
                with self.sess.as_default():
                    policy = policy_graph(self.env.observation_space,
                                          self.env.action_space, registry,
                                          policy_config)
        else:
            policy = policy_graph(self.env.observation_space,
                                  self.env.action_space, registry,
                                  policy_config)
        self.policy_map = {"default": policy}

        self.obs_filter = get_filter(observation_filter,
                                     self.env.observation_space.shape)
        self.filters = {"obs_filter": self.obs_filter}

        # Always use vector env for consistency even if num_envs = 1
        if not isinstance(self.env, AsyncVectorEnv):
            if isinstance(self.env, ServingEnv):
                self.vector_env = _ServingEnvToAsync(self.env)
            else:
                if not isinstance(self.env, VectorEnv):
                    self.env = VectorEnv.wrap(make_env, [self.env],
                                              num_envs=num_envs)
                self.vector_env = _VectorEnvToAsync(self.env)
        else:
            self.vector_env = self.env

        if self.batch_mode == "truncate_episodes":
            if batch_steps % num_envs != 0:
                raise ValueError(
                    "In 'truncate_episodes' batch mode, `batch_steps` must be "
                    "evenly divisible by `num_envs`. Got {} and {}.".format(
                        batch_steps, num_envs))
            batch_steps = batch_steps // num_envs
            pack_episodes = True
        elif self.batch_mode == "complete_episodes":
            batch_steps = float("inf")  # never cut episodes
            pack_episodes = False  # sampler will return 1 episode per poll
        else:
            raise ValueError("Unsupported batch mode: {}".format(
                self.batch_mode))
        if sample_async:
            self.sampler = AsyncSampler(self.vector_env,
                                        self.policy_map["default"],
                                        self.obs_filter,
                                        batch_steps,
                                        horizon=episode_horizon,
                                        pack=pack_episodes)
            self.sampler.start()
        else:
            self.sampler = SyncSampler(self.vector_env,
                                       self.policy_map["default"],
                                       self.obs_filter,
                                       batch_steps,
                                       horizon=episode_horizon,
                                       pack=pack_episodes)
Esempio n. 27
0
    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init()

    cls = get_agent_class(args.run)
    agent = cls(env=args.env, config=args.config)
    agent.restore(args.checkpoint)
    num_steps = int(args.steps)

    if args.run == "DQN":
        env = gym.make(args.env)
        env = wrap_dqn(get_registry(), env, args.config.get("model", {}))
    else:
        env = ModelCatalog.get_preprocessor_as_wrapper(get_registry(),
                                                       gym.make(args.env))
    if args.out is not None:
        rollouts = []
    steps = 0
    while steps < (num_steps or steps + 1):
        if args.out is not None:
            rollout = []
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done and steps < (num_steps or steps + 1):
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
Esempio n. 28
0
 def testCustomModel(self):
     ray.init()
     ModelCatalog.register_custom_model("foo", CustomModel)
     p1 = ModelCatalog.get_model(
         get_registry(), 1, 5, {"custom_model": "foo"})
     assert type(p1) == CustomModel
Esempio n. 29
0
parser.add_argument('-b',
                    '--battles',
                    type=int,
                    default=1000,
                    help='Amount of battles to test the model')
args = parser.parse_args()

env = PokeBattleEnv(ShowdownSimulator(self_play=False))
env_creator_name = "PokeBattleEnv-v0"
register_env(env_creator_name, lambda config: env)

ray.init()
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = 1
config['timesteps_per_batch'] = 200
config['horizon'] = 500
config['min_steps_per_task'] = 1
agent = ppo.PPOAgent(config=config,
                     env=env_creator_name,
                     registry=get_registry())

agent.restore(args.load)

for battle in range(args.battles):
    observation = env.reset()
    env.render()
    done = False
    while not done:
        action = agent.compute_action(observation)
        observation, _, done, _ = env.step(action)