class RemoteEnv(Process):
    """
    INTERNAL

     .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

    This class represent the environment that is executed on a remote process.

    Note that the environment is only created in the subprocess, and is not available in the main process. Once created
    it is not possible to access anything directly from it in the main process, where the BaseAgent lives. Only the
    :class:`grid2op.Observation.BaseObservation` are forwarded to the agent.

    """
    def __init__(self,
                 env_params,
                 remote,
                 parent_remote,
                 seed,
                 name=None,
                 return_info=True,
                 _obs_to_vect=True):
        Process.__init__(self, group=None, target=None, name=name)
        self.backend = None
        self.env = None
        self.env_params = env_params
        self.remote = remote
        self.parent_remote = parent_remote
        self.seed_used = seed
        self.space_prng = None
        self.fast_forward = 0
        self.all_seeds = []

        # internal do not modify  # Do not work (in the sens that is it less efficient)
        self.return_info = return_info
        self._obs_to_vect = _obs_to_vect
        self._comp_time = 0.

    def init_env(self):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Initialize the environment  that will perform all the computation of this process.
        Remember the environment only lives in this process. It cannot
        be transfer to / from the main process.

        This function also makes sure the chronics are read in different order accross all processes. This is done
        by calling the :func:`grid2op.Chronics.GridValue.shuffle` method. An example of how to use this function
        is provided in :func:`grid2op.Chronics.Multifolder.shuffle`.

        """
        self.space_prng = np.random.RandomState()
        self.space_prng.seed(seed=self.seed_used)
        self.backend = self.env_params["_raw_backend_class"]()
        with warnings.catch_warnings():
            # warnings have bee already sent in the main process, no need to resend them
            warnings.filterwarnings("ignore")
            self.env = Environment(**self.env_params, backend=self.backend)
        env_seed = self.space_prng.randint(np.iinfo(dt_int).max)
        self.all_seeds = self.env.seed(env_seed)
        self.env.chronics_handler.shuffle(shuffler=lambda x: x[
            self.space_prng.choice(len(x), size=len(x), replace=False)])

    def _clean_observation(self, obs):
        obs._forecasted_grid = []
        obs._forecasted_inj = []
        obs._obs_env = None
        obs.action_helper = None
        return obs

    def get_obs_ifnotconv(self):
        # warnings.warn(f"get_obs_ifnotconv is used")
        # TODO dirty hack because of wrong chronics
        # need to check!!!
        conv = False
        obs_v = None
        obs = None
        while not conv:
            try:
                self.env.reset()
                if self.fast_forward > 0:
                    self.env.fast_forward_chronics(
                        self.space_prng.randint(0, self.fast_forward))
                obs = self.env.get_obs()
                obs_v = obs.to_vect()
                if np.all(np.isfinite(obs_v)):
                    # i make sure that everything is not Nan
                    # other i consider it's "divergence" so "game over"
                    conv = True
            except Exception as exc_:
                pass
        if self._obs_to_vect:
            res = obs_v
        else:
            res = obs
        return res

    def run(self):
        if self.env is None:
            self.init_env()

        while True:
            cmd, data = self.remote.recv()
            if cmd == 'get_spaces':
                self.remote.send(
                    (self.env.observation_space, self.env.action_space))
            elif cmd == 's':
                # perform a step
                beg_ = time.time()
                if data is None:
                    data = self.env.action_space()
                else:
                    data = self.env.action_space.from_vect(data)
                obs, reward, done, info = self.env.step(data)
                obs_v = obs.to_vect()
                if done or np.any(~np.isfinite(obs_v)):
                    # if done do a reset
                    res_obs = self.get_obs_ifnotconv()
                elif self._obs_to_vect:
                    res_obs = obs.to_vect()
                else:
                    res_obs = self._clean_observation(obs)

                if not self.return_info:
                    info = None
                end_ = time.time()
                self._comp_time += end_ - beg_
                self.remote.send((res_obs, reward, done, info))
            elif cmd == 'r':
                # perfom a reset
                obs_v = self.get_obs_ifnotconv()
                self.remote.send(obs_v)
            elif cmd == 'c':
                # close everything
                self.env.close()
                self.remote.close()
                break
            elif cmd == 'z':
                # adapt the chunk size
                self.env.set_chunk_size(data)
            elif cmd == 'o':
                # get_obs
                tmp = self.env.get_obs()
                if self._obs_to_vect:
                    res_obs = tmp.to_vect()
                else:
                    res_obs = self._clean_observation(tmp)
                self.remote.send(res_obs)
            elif cmd == "f":
                # fast forward the chronics when restart
                self.fast_forward = int(data)
            elif cmd == "seed":
                self.remote.send((self.seed_used, self.all_seeds))
            elif cmd == "params":
                self.remote.send(self.env.parameters)
            elif cmd == "comp_time":
                self.remote.send(self._comp_time)
            elif cmd == "powerflow_time":
                self.remote.send(self.env.backend.comp_time)
            elif cmd == "step_time":
                self.remote.send(self.env._time_step)
            elif cmd == "set_filter":
                self.env.chronics_handler.set_filter(data)
                self.remote.send(None)
            elif cmd == "set_id":
                self.env.set_id(data)
                self.remote.send(None)
            elif hasattr(self.env, cmd):
                tmp = getattr(self.env, cmd)
                self.remote.send(tmp)
            else:
                raise NotImplementedError
Beispiel #2
0
class RemoteEnv(Process):
    """
    This class represent the environment that is executed on a remote process.

    Note that the environment is only created in the subprocess, and is not available in the main process. Once created
    it is not possible to access anything directly from it in the main process, where the BaseAgent lives. Only the
    :class:`grid2op.Observation.BaseObservation` are forwarded to the agent.

    """
    def __init__(self, env_params, remote, parent_remote, seed, name=None):
        Process.__init__(self, group=None, target=None, name=name)
        self.backend = None
        self.env = None
        self.env_params = env_params
        self.remote = remote
        self.parent_remote = parent_remote
        self.seed_used = seed
        self.space_prng = None
        self.fast_forward = 0
        self.all_seeds = []

    def init_env(self):
        """
        Initialize the environment  that will perform all the computation of this process.
        Remember the environment only lives in this process. It cannot
        be transfer to / from the main process.

        This function also makes sure the chronics are read in different order accross all processes. This is done
        by calling the :func:`grid2op.Chronics.GridValue.shuffle` method. An example of how to use this function
        is provided in :func:`grid2op.Chronics.Multifolder.shuffle`.

        """
        # TODO documentation

        self.space_prng = np.random.RandomState()
        self.space_prng.seed(seed=self.seed_used)
        self.backend = self.env_params["_raw_backend_class"]()
        self.env = Environment(**self.env_params, backend=self.backend)
        env_seed = self.space_prng.randint(np.iinfo(dt_int).max)
        self.all_seeds = self.env.seed(env_seed)
        self.env.chronics_handler.shuffle(shuffler=lambda x: x[
            self.space_prng.choice(len(x), size=len(x), replace=False)])

    def _clean_observation(self, obs):
        obs._forecasted_grid = []
        obs._forecasted_inj = []
        obs._obs_env = None
        obs.action_helper = None

    def get_obs_ifnotconv(self):
        # TODO dirty hack because of wrong chronics
        # need to check!!!
        conv = False
        obs_v = None
        while not conv:
            try:
                obs = self.env.reset()
                if self.fast_forward > 0:
                    self.env.fast_forward_chronics(
                        self.space_prng.randint(0, self.fast_forward))
                obs = self.env.get_obs()
                obs_v = obs.to_vect()
                if np.all(np.isfinite(obs_v)):
                    # i make sure that everything is not Nan
                    # other i consider it's "divergence" so "game over"
                    conv = True
            except:
                pass
        return obs_v

    def run(self):
        if self.env is None:
            self.init_env()

        while True:
            cmd, data = self.remote.recv()
            if cmd == 'get_spaces':
                self.remote.send(
                    (self.env.observation_space, self.env.action_space))
            elif cmd == 's':
                # perform a step
                data = self.env.action_space.from_vect(data)
                obs, reward, done, info = self.env.step(data)
                obs_v = obs.to_vect()
                if done or np.any(~np.isfinite(obs_v)):
                    # if done do a reset
                    obs_v = self.get_obs_ifnotconv()
                self.remote.send((obs_v, reward, done, info))
            elif cmd == 'r':
                # perfom a reset
                obs_v = self.get_obs_ifnotconv()
                # self._clean_observation(obs)
                self.remote.send(obs_v)
            elif cmd == 'c':
                # close everything
                self.env.close()
                self.remote.close()
                break
            elif cmd == 'z':
                # adapt the chunk size
                self.env.set_chunk_size(data)
            elif cmd == "f":
                # fast forward the chronics when restart
                self.fast_forward = int(data)
            elif cmd == "seed":
                self.remote.send((self.seed_used, self.all_seeds))
            elif cmd == "params":
                self.remote.send(self.env.parameters)
            elif hasattr(self.env, cmd):
                self.remote.send(getattr(self.env, cmd))
            else:
                raise NotImplementedError