예제 #1
0
    def __init__(self,
                 gridobj,
                 env,
                 rewardClass=None,
                 observationClass=CompleteObservation,
                 with_forecast=True):
        """
        Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid
        """

        SerializableObservationSpace.__init__(
            self, gridobj, observationClass=observationClass)

        # TODO DOCUMENTATION !!!

        self.with_forecast = with_forecast
        # print("ObservationSpace init with rewardClass: {}".format(rewardClass))
        self.parameters = copy.deepcopy(env.parameters)
        # for the observation, I switch between the _parameters for the environment and for the simulation
        self.parameters.ENV_DC = self.parameters.FORECAST_DC

        if rewardClass is None:
            self.rewardClass = env.rewardClass
        else:
            self.rewardClass = rewardClass

        # helpers
        self.action_helper_env = env.helper_action_env
        self.reward_helper = RewardHelper(rewardClass=self.rewardClass)
        self.reward_helper.initialize(env)

        other_rewards = {
            k: v.rewardClass
            for k, v in env.other_rewards.items()
        }

        # TODO here: have another backend maybe
        self._backend_obs = env.backend.copy()

        _ObsEnv_class = _ObsEnv.init_grid(self._backend_obs)
        self.obs_env = _ObsEnv_class(
            backend_instanciated=self._backend_obs,
            obsClass=self.observationClass,
            parameters=env.parameters,
            reward_helper=self.reward_helper,
            action_helper=self.action_helper_env,
            thermal_limit_a=env._thermal_limit_a,
            legalActClass=env.legalActClass,
            donothing_act=env.helper_action_player(),
            other_rewards=other_rewards,
            completeActionClass=env.helper_action_env.actionClass,
            helper_action_class=env.helper_action_class,
            helper_action_env=env.helper_action_env)

        for k, v in self.obs_env.other_rewards.items():
            v.initialize(env)

        self._empty_obs = self.observationClass(
            obs_env=self.obs_env, action_helper=self.action_helper_env)
        self._update_env_time = 0.
예제 #2
0
    def __init__(self,
                 gridobj,
                 env,
                 rewardClass=None,
                 observationClass=CompleteObservation,
                 actionClass=None,
                 with_forecast=True):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid
        """

        if actionClass is None:
            from grid2op.Action import CompleteAction
            actionClass = CompleteAction

        SerializableObservationSpace.__init__(self, gridobj, observationClass=observationClass)

        self.with_forecast = with_forecast
        self._simulate_parameters = copy.deepcopy(env.parameters)

        if rewardClass is None:
            self._reward_func = env._reward_helper.template_reward
        else:
            self._reward_func = rewardClass

        # helpers
        self.action_helper_env = env._helper_action_env
        self.reward_helper = RewardHelper(reward_func=self._reward_func)
        self.reward_helper.initialize(env)

        other_rewards = {k: v.rewardClass for k, v in env.other_rewards.items()}

        # TODO here: have another backend maybe
        self._backend_obs = env.backend.copy()
        _ObsEnv_class = _ObsEnv.init_grid(type(env.backend), force_module=_ObsEnv.__module__)
        setattr(sys.modules[_ObsEnv.__module__], _ObsEnv_class.__name__, _ObsEnv_class)
        self.obs_env = _ObsEnv_class(backend_instanciated=self._backend_obs,
                                     obsClass=observationClass,  # do not put self.observationClass otherwise it's initialized twice
                                     parameters=self._simulate_parameters,
                                     reward_helper=self.reward_helper,
                                     action_helper=self.action_helper_env,
                                     thermal_limit_a=env.get_thermal_limit(),
                                     legalActClass=copy.deepcopy(env._legalActClass),
                                     other_rewards=other_rewards,
                                     helper_action_class=env._helper_action_class,
                                     helper_action_env=env._helper_action_env,
                                     epsilon_poly=env._epsilon_poly,
                                     tol_poly=env._tol_poly,
                                     )
        for k, v in self.obs_env.other_rewards.items():
            v.initialize(env)

        self._empty_obs = self._template_obj
        self._update_env_time = 0.