Esempi in Python per RewardSpace, esempi in Python per rl_coach.spaces.RewardSpace

Esempio n. 1

0

Mostra file

File: test_reward_clipping_filter.py Progetto: bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_get_filtered_reward_space(clip_filter):
    # reward is clipped
    reward_space = RewardSpace(1, -100, 100)
    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)

    # make sure the new reward space shape is calculated correctly
    assert filtered_reward_space.shape == 1
    assert filtered_reward_space.low == 2
    assert filtered_reward_space.high == 10

    # reward is unclipped
    reward_space = RewardSpace(1, 5, 7)
    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)

    # make sure the new reward space shape is calculated correctly
    assert filtered_reward_space.shape == 1
    assert filtered_reward_space.low == 5
    assert filtered_reward_space.high == 7

    # infinite reward is clipped
    reward_space = RewardSpace(1, -np.inf, np.inf)
    filtered_reward_space = clip_filter.get_filtered_reward_space(reward_space)

    # make sure the new reward space shape is calculated correctly
    assert filtered_reward_space.shape == 1
    assert filtered_reward_space.low == 2
    assert filtered_reward_space.high == 10

Esempio n. 2

0

Mostra file

File: reward_adversarial_inversion_filter.py Progetto: MaslinuPoimal/coach

 def get_filtered_reward_space(
         self, input_reward_space: RewardSpace) -> RewardSpace:
     input_reward_space.high = max(input_reward_space.high,
                                   -input_reward_space.low)
     input_reward_space.low = min(input_reward_space.low,
                                  -input_reward_space.high)
     return input_reward_space

Esempio n. 3

0

Mostra file

    def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
                 human_control: bool, custom_reward_threshold: Union[int,
                                                                     float],
                 visualization_parameters: VisualizationParameters, **kwargs):
        """
        :param level: The environment level. Each environment can have multiple levels
        :param seed: a seed for the random number generator of the environment
        :param frame_skip: number of frames to skip (while repeating the same action) between each two agent directives
        :param human_control: human should control the environment
        :param visualization_parameters: a blob of parameters used for visualization of the environment
        :param **kwargs: as the class is instantiated by EnvironmentParameters, this is used to support having
                         additional arguments which will be ignored by this class, but might be used by others
        """
        super().__init__()

        # env initialization

        self.game = []

        self.state = {}
        self.observation = None
        self.goal = None
        self.reward = 0
        self.done = False
        self.info = {}
        self._last_env_response = None
        self.last_action = 0
        self.episode_idx = 0
        self.total_steps_counter = 0
        self.current_episode_steps_counter = 0
        self.last_episode_time = time.time()
        self.key_to_action = {}
        self.last_episode_images = []

        # rewards
        self.total_reward_in_current_episode = 0
        self.max_reward_achieved = -np.inf
        self.reward_success_threshold = custom_reward_threshold

        # spaces
        self.state_space = self._state_space = None
        self.goal_space = self._goal_space = None
        self.action_space = self._action_space = None
        self.reward_space = RewardSpace(
            1, reward_success_threshold=self.reward_success_threshold
        )  # TODO: add a getter and setter

        self.env_id = str(level)
        self.seed = seed
        self.frame_skip = frame_skip

        # human interaction and visualization
        self.human_control = human_control
        self.wait_for_explicit_human_action = False
        self.is_rendered = visualization_parameters.render or self.human_control
        self.native_rendering = visualization_parameters.native_rendering and not self.human_control
        self.visualization_parameters = visualization_parameters
        if not self.native_rendering:
            self.renderer = Renderer()

Esempio n. 4

0

Mostra file

File: multi_agent_environment.py Progetto: wfrei020/DeepRacer-Freire

    def __init__(self,
                 level: LevelSelection,
                 seed: int,
                 frame_skip: int,
                 custom_reward_threshold: Union[int, float],
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 num_agents: int = 1,
                 **kwargs):
        """
        :param level: The environment level. Each environment can have multiple levels
        :param seed: a seed for the random number generator of the environment
        :param frame_skip: number of frames to skip (while repeating the same action) between each two agent directives
        :param visualization_parameters: a blob of parameters used for visualization of the environment
        :param **kwargs: as the class is instantiated by MultiAgentEnvironmentParameters, this is used to support having
                         additional arguments which will be ignored by this class, but might be used by others
        """
        super().__init__()

        # env initialization
        self.num_agents = num_agents
        self.state = [{}] * num_agents
        self.reward = [0.0] * num_agents
        self.done = [False] * num_agents
        self.goal = None
        self.info = {}
        self._last_env_response = [None] * num_agents
        self.last_action = [0] * num_agents
        self.episode_idx = 0
        self.total_steps_counter = 0
        self.current_episode_steps_counter = 0
        self.last_episode_time = time.time()

        # rewards
        self.total_reward_in_current_episode = [0.0] * num_agents
        self.max_reward_achieved = [-np.inf] * num_agents
        self.reward_success_threshold = custom_reward_threshold

        # spaces
        self.state_space = self._state_space = [None] * num_agents
        self.goal_space = self._goal_space = None
        self.action_space = self._action_space = [None] * num_agents
        self.reward_space = RewardSpace(
            1, reward_success_threshold=self.reward_success_threshold
        )  # TODO: add a getter and setter

        self.env_id = str(level)
        self.seed = seed
        self.frame_skip = frame_skip

        # visualization
        self.visualization_parameters = visualization_parameters

        # Set target reward and target_success if present
        self.target_success_rate = target_success_rate

Esempio n. 5

0

Mostra file

def test_get_filtered_reward_space():
    rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))

    # reward is clipped
    reward_space = RewardSpace(1, -100, 100)
    filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)

    # make sure the new reward space shape is calculated correctly
    assert filtered_reward_space.shape == 1
    assert filtered_reward_space.low == -10
    assert filtered_reward_space.high == 10

    # unbounded rewards
    reward_space = RewardSpace(1, -np.inf, np.inf)
    filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space)

    # make sure the new reward space shape is calculated correctly
    assert filtered_reward_space.shape == 1
    assert filtered_reward_space.low == -np.inf
    assert filtered_reward_space.high == np.inf

Esempio n. 6

0

Mostra file

File: batch_rl.py Progetto: guyk1971/coach

def train_on_csv_file(csv_file, n_epochs, dataset_size, obs_dim, act_dim):
    tf.reset_default_graph(
    )  # just to clean things up; only needed for the tutorial

    schedule_params = set_schedule_params(n_epochs, dataset_size)

    #########
    # Agent #
    #########
    # note that we have moved to BCQ, which will help the training to converge better and faster
    agent_params = set_agent_params(DDQNBCQAgentParameters)
    # additional setting for DDQNBCQAgentParameters agent parameters
    # can use either a kNN or a NN based model for predicting which actions not to max over in the bellman equation
    # agent_params.algorithm.action_drop_method_parameters = KNNParameters()
    agent_params.algorithm.action_drop_method_parameters = NNImitationModelParameters(
    )

    DATATSET_PATH = csv_file
    agent_params.memory.load_memory_from_file_path = CsvDataset(
        DATATSET_PATH, is_episodic=True)

    spaces = SpacesDefinition(state=StateSpace(
        {'observation': VectorObservationSpace(shape=obs_dim)}),
                              goal=None,
                              action=DiscreteActionSpace(act_dim),
                              reward=RewardSpace(1))

    graph_manager = BatchRLGraphManager(
        agent_params=agent_params,
        env_params=None,
        spaces_definition=spaces,
        schedule_params=schedule_params,
        vis_params=VisualizationParameters(
            dump_signals_to_csv_every_x_episodes=1),
        reward_model_num_epochs=30,
        train_to_eval_ratio=0.4)
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
    return

Esempio n. 7

0

Mostra file

    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 additional_simulator_parameters: Dict[str, Any] = {},
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 random_initialization_steps: int = 1,
                 max_over_num_frames: int = 1,
                 observation_space_type: ObservationSpaceType = None,
                 **kwargs):
        """
        :param level: (str)
            A string representing the gym level to run. This can also be a LevelSelection object.
            For example, BreakoutDeterministic-v0

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param additional_simulator_parameters: (Dict[str, Any])
            Any additional parameters that the user can pass to the Gym environment. These parameters should be
            accepted by the __init__ function of the implemented Gym environment.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.
            If not set, this value will be taken from the Gym environment definition.

        :param random_initialization_steps: (int)
            The number of random steps that will be taken in the environment after each reset.
            This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees.

        :param max_over_num_frames: (int)
            This value will be used for merging multiple frames into a single frame by taking the maximum value for each
            of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects
            can be seen in one frame but disappear in the next.

        :param observation_space_type:
            This value will be used for generating observation space. Allows a custom space. Should be one of
            ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions
            of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise.
        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)
        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            try:
                self.env = env_class(**self.additional_simulator_parameters)
            except:
                screen.error(
                    "Failed to instantiate Gym environment class %s with arguments %s"
                    % (env_class, self.additional_simulator_parameters),
                    crash=False)
                raise
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = [
            'classic_control', 'mujoco', 'robotics'
        ]
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_roboschool_env = 'roboschool' in str(
            self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning(
                    "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                    "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                    "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4."
                )
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(
                self.env,
                frameskip=self.frame_skip,
                max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space, gym.spaces.dict.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if observation_space_type == ObservationSpaceType.Tensor:
                # we consider arbitrary input tensor which does not necessarily represent images
                self.state_space[
                    observation_space_name] = TensorObservationSpace(
                        shape=np.array(observation_space.shape),
                        low=observation_space.low,
                        high=observation_space.high)
            elif observation_space_type == ObservationSpaceType.Image or len(
                    observation_space.shape) == 3:
                # we assume gym has image observations (with arbitrary number of channels) where their values are
                # within 0-255, and where the channel dimension is the last dimension
                if observation_space.shape[-1] in [1, 3]:
                    self.state_space[
                        observation_space_name] = ImageObservationSpace(
                            shape=np.array(observation_space.shape),
                            high=255,
                            channels_axis=-1)
                else:
                    # For any number of channels other than 1 or 3, use the generic PlanarMaps space
                    self.state_space[
                        observation_space_name] = PlanarMapsObservationSpace(
                            shape=np.array(observation_space.shape),
                            low=0,
                            high=255,
                            channels_axis=-1)
            elif observation_space_type == ObservationSpaceType.Vector or len(
                    observation_space.shape) == 1:
                self.state_space[
                    observation_space_name] = VectorObservationSpace(
                        shape=observation_space.shape[0],
                        low=observation_space.low,
                        high=observation_space.high)
            else:
                raise screen.error(
                    "Failed to instantiate Gym environment class %s with observation space type %s"
                    % (env_class, observation_space_type),
                    crash=True)

        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high)
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description)
        else:
            raise screen.error((
                "Failed to instantiate gym environment class {} due to unsupported "
                "action space {}. Expected BoxActionSpace or DiscreteActionSpace."
            ).format(env_class, self.env.action_space),
                               crash=True)

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()
            else:
                screen.error(
                    "Error: Environment {} does not support human control.".
                    format(self.env),
                    crash=True)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(
            shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
            self.reward_success_threshold = self.env.spec.reward_threshold
            self.reward_space = RewardSpace(
                1, reward_success_threshold=self.reward_success_threshold)

        self.target_success_rate = target_success_rate

Esempio n. 8

0

Mostra file

File: reward_clipping_filter.py Progetto: bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

 def get_filtered_reward_space(
         self, input_reward_space: RewardSpace) -> RewardSpace:
     input_reward_space.high = min(self.clipping_high,
                                   input_reward_space.high)
     input_reward_space.low = max(self.clipping_low, input_reward_space.low)
     return input_reward_space

Esempio n. 9

0

Mostra file

# ER - we'll be needing an episodic replay buffer for off-policy evaluation
agent_params.memory = EpisodicExperienceReplayParameters()

# E-Greedy schedule - there is no exploration in Batch RL. Disabling E-Greedy. 
agent_params.exploration.epsilon_schedule = LinearSchedule(initial_value=0, final_value=0, decay_steps=1)
agent_params.exploration.evaluation_epsilon = 0

# can use either a kNN or a NN based model for predicting which actions not to max over in the bellman equation
#agent_params.algorithm.action_drop_method_parameters = KNNParameters()


DATATSET_PATH = 'acrobot_dataset.csv'
agent_params.memory = EpisodicExperienceReplayParameters()
agent_params.memory.load_memory_from_file_path = CsvDataset(DATATSET_PATH, is_episodic = True)

spaces = SpacesDefinition(state=StateSpace({'observation': VectorObservationSpace(shape=6)}),
                          goal=None,
                          action=DiscreteActionSpace(3),
                          reward=RewardSpace(1))

graph_manager = BatchRLGraphManager(agent_params=agent_params,
                                    env_params=None,
                                    spaces_definition=spaces,
                                    schedule_params=schedule_params,
                                    vis_params=VisualizationParameters(dump_signals_to_csv_every_x_episodes=1),
                                    reward_model_num_epochs=30,
                                    train_to_eval_ratio=0.4)
graph_manager.create_graph(task_parameters)
graph_manager.improve()

Esempio n. 10

0

Mostra file

File: gym_environment.py Progetto: mdavala/coach

    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 additional_simulator_parameters: Dict[str, Any] = None,
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 random_initialization_steps: int = 1,
                 max_over_num_frames: int = 1,
                 **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)
        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            if self.additional_simulator_parameters:
                self.env = env_class(**self.additional_simulator_parameters)
            else:
                self.env = env_class()
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = [
            'classic_control', 'mujoco', 'robotics'
        ]
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__)
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        self.timelimit_env_wrapper = self.env
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning(
                    "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                    "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                    "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4."
                )
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(
                self.env,
                frameskip=self.frame_skip,
                max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space,
                          gym.spaces.dict_space.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if len(observation_space.shape
                   ) == 3 and observation_space.shape[-1] == 3:
                # we assume gym has image observations which are RGB and where their values are within 0-255
                self.state_space[
                    observation_space_name] = ImageObservationSpace(
                        shape=np.array(observation_space.shape),
                        high=255,
                        channels_axis=-1)
            else:
                self.state_space[
                    observation_space_name] = VectorObservationSpace(
                        shape=observation_space.shape[0],
                        low=observation_space.low,
                        high=observation_space.high)
        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high)
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description)

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        # measurements
        if self.env.spec is not None:
            self.timestep_limit = self.env.spec.timestep_limit
        else:
            self.timestep_limit = None

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(
            shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
            self.reward_success_threshold = self.env.spec.reward_threshold
            self.reward_space = RewardSpace(
                1, reward_success_threshold=self.reward_success_threshold)

Esempio n. 11

0

Mostra file

File: reward_rescale_filter.py Progetto: bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

 def get_filtered_reward_space(self, input_reward_space: RewardSpace) -> RewardSpace:
     input_reward_space.high = input_reward_space.high * self.rescale_factor
     input_reward_space.low = input_reward_space.low * self.rescale_factor
     return input_reward_space