Ejemplo n.º 1
0
    def test_dqn_apex_cpu_spawn_full_train(self, tmpdir):
        # by default, pytorch lightning will use ddp-spawn mode to replace ddp
        # if there are only cpus
        os.environ["WORLD_SIZE"] = "3"
        config = generate_env_config("CartPole-v0", {})
        config = generate_training_config(root_dir=tmpdir.make_numbered_dir(),
                                          config=config)
        config = generate_algorithm_config("DQNApex", config)
        # use ddp_cpu
        config["gpus"] = None
        config["num_processes"] = 3
        # this testing process corresponds to this node
        config["num_nodes"] = 1
        config["early_stopping_patience"] = 100
        # Use class instead of string name since algorithms is distributed.
        config["frame_config"]["models"] = [QNet, QNet]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 4,
                "action_num": 2
            },
            {
                "state_dim": 4,
                "action_num": 2
            },
        ]

        # for spawn we use a special callback, because the we cannot access
        # max_total_reward from sub-processes
        queue = SimpleQueue(ctx=mp.get_context("spawn"))
        # cb = [SpawnInspectCallback(queue), LoggerDebugCallback()]
        cb = [SpawnInspectCallback(queue)]
        t = Thread(target=launch, args=(config, ), kwargs={"pl_callbacks": cb})
        t.start()

        default_logger.info("Start tracking")
        subproc_max_total_reward = [0, 0, 0]
        while True:
            try:
                result = queue.quick_get(timeout=60)
                default_logger.info(
                    f"Result from process [{result[0]}]: {result[1]}")
                subproc_max_total_reward[result[0]] = result[1]
            except TimeoutError:
                # no more results
                default_logger.info("No more results.")
                break
        t.join()
        assert (
            sum(subproc_max_total_reward) / 3 >= 150
        ), f"Max total reward {sum(subproc_max_total_reward) / 3} below threshold 150."
Ejemplo n.º 2
0
    def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None:
        """
        Args:
            env_creators: List of gym environment creators, used to create
                environments on sub process workers, accepts a index as your
                environment id.
        """
        super().__init__()
        self.workers = []

        # Some environments will hang or collapse when using fork context.
        # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems.

        # In case users wants to pass tensors to environments,
        # always copy all tensors to avoid errors
        ctx = get_context("spawn")
        self.cmd_queues = [
            SimpleQueue(ctx=ctx, copy_tensor=True) for _ in range(len(env_creators))
        ]
        self.result_queue = SimpleQueue(ctx=ctx, copy_tensor=True)
        for cmd_queue, ec, env_idx in zip(
            self.cmd_queues, env_creators, range(len(env_creators))
        ):
            # enable recursive serialization to support
            # lambda & local function creators.
            self.workers.append(
                ctx.Process(
                    target=self._worker,
                    args=(
                        cmd_queue,
                        self.result_queue,
                        dumps(ec, recurse=True, copy_tensor=True),
                        env_idx,
                    ),
                )
            )

        for worker in self.workers:
            worker.daemon = True
            worker.start()

        self.env_size = env_size = len(env_creators)
        self._cmd_lock = Lock()
        self._closed = False
        tmp_env = env_creators[0](0)
        self._action_space = tmp_env.action_space
        self._obsrv_space = tmp_env.observation_space
        tmp_env.close()
        self._terminal = np.zeros([env_size], dtype=np.bool)
Ejemplo n.º 3
0
    def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None:
        """
        Args:
            env_creators: List of gym environment creators, used to create
                environments on sub process workers, accepts a index as your
                environment id.
        """
        super(ParallelWrapperSubProc, self).__init__()
        self.workers = []

        # Some environments will hang or collapse when using fork context.
        # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems.
        ctx = get_context("spawn")
        self.cmd_queues = [
            SimpleQueue(ctx=ctx) for _ in range(len(env_creators))
        ]
        self.result_queue = SimpleQueue(ctx=ctx)
        for cmd_queue, ec, env_idx in zip(self.cmd_queues, env_creators,
                                          range(len(env_creators))):
            # lambda & local function creators must be serialized by dill,
            # the default pickler in spawn context doesn't work.
            self.workers.append(
                ctx.Process(target=self._worker,
                            args=(cmd_queue, self.result_queue, dill.dumps(ec),
                                  env_idx)))

        for worker in self.workers:
            worker.daemon = True
            worker.start()

        self.env_size = env_size = len(env_creators)
        self._cmd_lock = Lock()
        self._closed = False
        tmp_env = env_creators[0](0)
        self._action_space = tmp_env.action_space
        self._obsrv_space = tmp_env.observation_space
        tmp_env.close()
        self._terminal = np.zeros([env_size], dtype=np.bool)
Ejemplo n.º 4
0
    def _worker(cmd_queue: SimpleQueue, result_queue: SimpleQueue, env_creator,
                env_idx):
        env = None
        try:
            env = dill.loads(env_creator)(env_idx)
        except Exception:
            # Something has gone wrong during environment creation,
            # exit with error.
            exit(2)
        try:
            while True:
                try:
                    command = cmd_queue.quick_get(timeout=1e-3)
                except TimeoutError:
                    continue

                try:
                    if command is not None:
                        method, args, kwargs = command
                    else:
                        # End of all tasks signal received
                        cmd_queue.close()
                        result_queue.close()
                        break
                    result = getattr(env, method)(*args, **kwargs)
                    result_queue.put((env_idx, True, result))
                except Exception as e:
                    # Something has gone wrong during execution, serialize
                    # the exception and send it back to master.
                    result_queue.put(
                        (env_idx, False, ExceptionWithTraceback(e)))
        except KeyboardInterrupt:
            cmd_queue.close()
            result_queue.close()
Ejemplo n.º 5
0
class ParallelWrapperSubProc(ParallelWrapperBase):
    """
    Parallel wrapper based on sub processes.
    """
    def __init__(self, env_creators: List[Callable[[int], gym.Env]]) -> None:
        """
        Args:
            env_creators: List of gym environment creators, used to create
                environments on sub process workers, accepts a index as your
                environment id.
        """
        super(ParallelWrapperSubProc, self).__init__()
        self.workers = []

        # Some environments will hang or collapse when using fork context.
        # E.g.: in "CarRacing-v0". pyglet used by gym will have render problems.
        ctx = get_context("spawn")
        self.cmd_queues = [
            SimpleQueue(ctx=ctx) for _ in range(len(env_creators))
        ]
        self.result_queue = SimpleQueue(ctx=ctx)
        for cmd_queue, ec, env_idx in zip(self.cmd_queues, env_creators,
                                          range(len(env_creators))):
            # lambda & local function creators must be serialized by dill,
            # the default pickler in spawn context doesn't work.
            self.workers.append(
                ctx.Process(target=self._worker,
                            args=(cmd_queue, self.result_queue, dill.dumps(ec),
                                  env_idx)))

        for worker in self.workers:
            worker.daemon = True
            worker.start()

        self.env_size = env_size = len(env_creators)
        self._cmd_lock = Lock()
        self._closed = False
        tmp_env = env_creators[0](0)
        self._action_space = tmp_env.action_space
        self._obsrv_space = tmp_env.observation_space
        tmp_env.close()
        self._terminal = np.zeros([env_size], dtype=np.bool)

    def reset(self, idx: Union[int, List[int]] = None) -> List[object]:
        """
        Returns:
            A list of gym states.
        """
        env_idxs = self._select_envs(idx)
        self._terminal[env_idxs] = False
        with self._cmd_lock:
            return self._call_gym_env_method(env_idxs, "reset")

    def step(self,
             action: Union[np.ndarray, List[Any]],
             idx: Union[int, List[int]] = None) \
            -> Tuple[List[object], np.ndarray, np.ndarray, List[dict]]:
        """
        Let specified environment(s) run one time step. Specified environments
        must be active and have not reached terminal states before.

        Args:
            action: Actions sent to each specified environment, the size of the
                first dimension must match the number of selected environments.
            idx: Indexes of selected environments, default is all.

        Returns:
            Observation, reward, terminal, and diagnostic info.
        """
        env_idxs = self._select_envs(idx)
        if len(action) != len(env_idxs):
            raise ValueError("Action number must match environment number!")

        with self._cmd_lock:
            result = self._call_gym_env_method(env_idxs, "step",
                                               [(act, ) for act in action])

        obsrv = [r[0] for r in result]
        reward = np.stack([r[1] for r in result])
        terminal = np.stack([r[2] for r in result])
        info = [r[3] for r in result]

        self._terminal[env_idxs] |= terminal

        return obsrv, reward, terminal, info

    def seed(self, seed: Union[int, List[int]] = None) -> List[int]:
        """
        Set seeds for all environments.

        Args:
            seed: If seed is ``int``, the same seed will be used for all
                environments.
                If seed is ``List[int]``, it must have the same size as
                the number of all environments.
                If seed is ``None``, all environments will use the default
                seed.

        Returns:
            Actual used seed returned by all environments.
        """
        if np.isscalar(seed) or seed is None:
            seed = [seed] * self.size()
        env_idxs = self._select_envs()
        with self._cmd_lock:
            return self._call_gym_env_method(env_idxs, "seed",
                                             [(sd, ) for sd in seed])

    def render(self,
               idx: Union[int, List[int]] = None,
               *args,
               **kwargs) -> List[np.ndarray]:
        """
        Render all/specified environments.

        Args:
            idx: Indexes of selected environments, default is all.

        Returns:
            A list or rendered frames, of type ``np.ndarray`` and size
            (H, W, 3).
        """
        env_idxs = self._select_envs(idx)
        with self._cmd_lock:
            return self._call_gym_env_method(env_idxs,
                                             "render",
                                             kwargs=list(
                                                 repeat({"mode": "rgb_array"},
                                                        len(env_idxs))))

    def close(self) -> None:
        """
        Close all environments, including the wrapper.
        """
        with self._cmd_lock:
            if self._closed:
                return
            self._closed = True
            env_idxs = self._select_envs()
            self._call_gym_env_method(env_idxs, "close")
            for cmd_queue in self.cmd_queues:
                cmd_queue.quick_put(None)
            for worker in self.workers:
                worker.join()

    def active(self) -> List[int]:
        """
        Returns: Indexes of current active environments.
        """
        return np.arange(self.size())[~self._terminal]

    def size(self) -> int:
        """
        Returns: Number of environments.
        """
        return self.env_size

    @property
    def action_space(self) -> Any:
        # DOC INHERITED
        return self._action_space

    @property
    def observation_space(self) -> Any:
        # DOC INHERITED
        return self._obsrv_space

    def _select_envs(self, idx=None):
        if idx is None:
            idx = list(range(self.env_size))
        else:
            if np.isscalar(idx):
                idx = [idx]
        return idx

    def _call_gym_env_method(self, env_idxs, method, args=None, kwargs=None):
        if args is None:
            args = [() for _ in range(len(env_idxs))]
        if kwargs is None:
            kwargs = [{} for _ in range(len(env_idxs))]

        result = {}
        # Check whether any process has exited with error code:
        for worker, worker_id in zip(self.workers, range(len(self.workers))):
            if worker.exitcode is None:
                continue
            if worker.exitcode == 2:
                raise RuntimeError(
                    "Worker {} failed to create environment.".format(
                        worker_id))
            elif worker.exitcode != 0:
                raise RuntimeError("Worker {} exited with code {}.".format(
                    worker_id, worker.exitcode))

        for env_idx, i in zip(env_idxs, range(len(env_idxs))):
            self.cmd_queues[env_idx].quick_put((method, args[i], kwargs[i]))
        while len(result) < len(env_idxs):
            e_idx, success, res = self.result_queue.get()
            if success:
                result[e_idx] = res
            else:
                raise res
        return [result[e_idx] for e_idx in env_idxs]

    @staticmethod
    def _worker(cmd_queue: SimpleQueue, result_queue: SimpleQueue, env_creator,
                env_idx):
        env = None
        try:
            env = dill.loads(env_creator)(env_idx)
        except Exception:
            # Something has gone wrong during environment creation,
            # exit with error.
            exit(2)
        try:
            while True:
                try:
                    command = cmd_queue.quick_get(timeout=1e-3)
                except TimeoutError:
                    continue

                try:
                    if command is not None:
                        method, args, kwargs = command
                    else:
                        # End of all tasks signal received
                        cmd_queue.close()
                        result_queue.close()
                        break
                    result = getattr(env, method)(*args, **kwargs)
                    result_queue.put((env_idx, True, result))
                except Exception as e:
                    # Something has gone wrong during execution, serialize
                    # the exception and send it back to master.
                    result_queue.put(
                        (env_idx, False, ExceptionWithTraceback(e)))
        except KeyboardInterrupt:
            cmd_queue.close()
            result_queue.close()