Ejemplo n.º 1
0
def test_measurements_observation_space():
    # empty measurements space
    measurements_space = VectorObservationSpace(0)

    # vector space
    measurements_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
Ejemplo n.º 2
0
def get_observation_space(sensor):
    '''Creates the observation space for the given sensor
       sensor - String with the desired sensor to add to the
                observation space
    '''
    obs = StateSpace({})

    if not isinstance(sensor, str):
        raise GenericError("None string type for sensor type: {}".format(
            type(sensor)))

    if sensor == Input.CAMERA.value or sensor == Input.OBSERVATION.value or \
    sensor == Input.LEFT_CAMERA.value:
        obs[sensor] = ImageObservationSpace(shape=np.array(
            (TRAINING_IMAGE_SIZE[1], TRAINING_IMAGE_SIZE[0], 3)),
                                            high=255,
                                            channels_axis=-1)
    elif sensor == Input.STEREO.value:
        obs[sensor] = PlanarMapsObservationSpace(shape=np.array(
            (TRAINING_IMAGE_SIZE[1], TRAINING_IMAGE_SIZE[0], 2)),
                                                 low=0,
                                                 high=255,
                                                 channels_axis=-1)
    elif sensor == Input.LIDAR.value:
        obs[sensor] = VectorObservationSpace(shape=TRAINING_LIDAR_SIZE,
                                             low=0.15,
                                             high=1.0)
    elif sensor == Input.SECTOR_LIDAR.value:
        obs[sensor] = VectorObservationSpace(shape=TRAINING_LIDAR_SIZE,
                                             low=0.15,
                                             high=SECTOR_LIDAR_CLIPPING_DIST)
    else:
        raise Exception(
            "Unable to set observation space for sensor {}".format(sensor))
    return obs
Ejemplo n.º 3
0
 def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
              human_control: bool, custom_reward_threshold: Union[int,
                                                                  float],
              visualization_parameters: VisualizationParameters, **kwargs):
     super().__init__(level, seed, frame_skip, human_control,
                      custom_reward_threshold, visualization_parameters,
                      **kwargs)
     self.state_space = StateSpace({
         "observation":
         VectorObservationSpace(shape=4)
         # "observation": PlanarMapsObservationSpace(shape=np.array([84, 84, 2]), low=0, high=1)
         # "observation": TensorObservationSpace(shape=np.array([2, 2]), low=0, high=1)
         # "player": VectorObservationSpace(shape=2),
         # "map": PlanarMapsObservationSpace(shape=np.array([84, 84, 1]), low=0, high=2),
     })
     self.goal_space = VectorObservationSpace(shape=4)
     # self.state_space = PlanarMapsObservationSpace(shape=np.array([84, 84, 2]), low=0, high=1)
     self.action_space = DiscreteActionSpace(num_actions=4,
                                             descriptions={
                                                 "0": "up",
                                                 "1": "down",
                                                 "2": "left",
                                                 "3": "right"
                                             })
     self.env = FacEnv()
     self.reward_limit = -np.power(
         np.sum(
             np.arange(1 + np.abs(self.env.target['x']) +
                       np.abs(self.env.target['y']))) / 2, 2)
Ejemplo n.º 4
0
    def get_filtered_observation_space(
            self, input_observation_space: VectorObservationSpace
    ) -> ObservationSpace:
        self.measurement_names = copy.copy(
            input_observation_space.measurements_names)

        if self.reduction_method == self.ReductionMethod.Keep:
            input_observation_space.shape[-1] = len(self.part_names)
            self.indices_to_keep = [
                idx for idx, val in enumerate(self.measurement_names)
                if val in self.part_names
            ]
            input_observation_space.measurements_names = copy.copy(
                self.part_names)
        elif self.reduction_method == self.ReductionMethod.Discard:
            input_observation_space.shape[-1] -= len(self.part_names)
            self.indices_to_keep = [
                idx for idx, val in enumerate(self.measurement_names)
                if val not in self.part_names
            ]
            input_observation_space.measurements_names = [
                val for val in input_observation_space.measurements_names
                if val not in self.part_names
            ]
        else:
            raise ValueError("The given reduction method is not supported")

        return input_observation_space
Ejemplo n.º 5
0
    def set_environment_parameters(self, spaces: SpacesDefinition):
        """
        Sets the parameters that are environment dependent. As a side effect, initializes all the components that are
        dependent on those values, by calling init_environment_dependent_modules

        :param spaces: the environment spaces definition
        :return: None
        """
        self.spaces = copy.deepcopy(spaces)

        if self.ap.algorithm.use_accumulated_reward_as_measurement:
            if 'measurements' in self.spaces.state.sub_spaces:
                self.spaces.state['measurements'].shape += 1
                self.spaces.state['measurements'].measurements_names += ['accumulated_reward']
            else:
                self.spaces.state['measurements'] = VectorObservationSpace(1, measurements_names=['accumulated_reward'])

        for observation_name in self.spaces.state.sub_spaces.keys():
            self.spaces.state[observation_name] = \
                self.pre_network_filter.get_filtered_observation_space(observation_name,
                    self.input_filter.get_filtered_observation_space(observation_name,
                                                                     self.spaces.state[observation_name]))

        self.spaces.reward = self.pre_network_filter.get_filtered_reward_space(
            self.input_filter.get_filtered_reward_space(self.spaces.reward))

        self.spaces.action = self.output_filter.get_unfiltered_action_space(self.spaces.action)

        if isinstance(self.in_action_space, GoalsSpace):
            # TODO: what if the goal type is an embedding / embedding change?
            self.spaces.goal = self.in_action_space
            self.spaces.goal.set_target_space(self.spaces.state[self.spaces.goal.goal_name])

        self.init_environment_dependent_modules()
Ejemplo n.º 6
0
    def set_environment_parameters(self, spaces: SpacesDefinition):
        self.spaces = copy.deepcopy(spaces)
        self.spaces.goal = VectorObservationSpace(
            shape=self.spaces.state['measurements'].shape,
            measurements_names=self.spaces.state['measurements'].
            measurements_names)

        # if the user has filled some scale values, check that he got the names right
        if set(self.spaces.state['measurements'].measurements_names).intersection(
                self.ap.algorithm.scale_measurements_targets.keys()) !=\
                set(self.ap.algorithm.scale_measurements_targets.keys()):
            raise ValueError(
                "Some of the keys in parameter scale_measurements_targets ({})  are not defined in "
                "the measurements space {}".format(
                    self.ap.algorithm.scale_measurements_targets.keys(),
                    self.spaces.state['measurements'].measurements_names))

        super().set_environment_parameters(self.spaces)

        # the below is done after calling the base class method, as it might add accumulated reward as a measurement

        # fill out the missing measurements scale factors
        for measurement_name in self.spaces.state[
                'measurements'].measurements_names:
            if measurement_name not in self.ap.algorithm.scale_measurements_targets:
                self.ap.algorithm.scale_measurements_targets[
                    measurement_name] = 1

        self.target_measurements_scale_factors = \
            np.array([self.ap.algorithm.scale_measurements_targets[measurement_name] for measurement_name in
                      self.spaces.state['measurements'].measurements_names])
Ejemplo n.º 7
0
    def _setup_state_space(self):
        state_space = StateSpace({})
        dummy_obs = self._process_observation(self.env.observation_spec())

        state_space['measurements'] = VectorObservationSpace(dummy_obs['measurements'].shape[0])

        if self.base_parameters.use_camera_obs:
            state_space['camera'] = PlanarMapsObservationSpace(dummy_obs['camera'].shape, 0, 255)

        return state_space
 def get_filtered_observation_space(self, input_observation_space: ObservationSpace) -> ObservationSpace:
     if isinstance(input_observation_space, VectorObservationSpace):
         self.input_observation_space = input_observation_space = VectorObservationSpace(input_observation_space.shape * self.stack_size)
     else:
         if self.stacking_axis == -1:
             input_observation_space.shape = np.append(input_observation_space.shape, values=[self.stack_size], axis=0)
         else:
             input_observation_space.shape = np.insert(input_observation_space.shape, obj=self.stacking_axis,
                                                      values=[self.stack_size], axis=0)
     return input_observation_space
Ejemplo n.º 9
0
def get_observation_space(sensor, model_metadata=None):
    '''Creates the observation space for the given sensor
       sensor - String with the desired sensor to add to the
                observation space
        model_metadata - model metadata information
    '''
    obs = StateSpace({})

    if not isinstance(sensor, str):
        raise GenericError("None string type for sensor type: {}".format(
            type(sensor)))

    if sensor == Input.CAMERA.value or sensor == Input.OBSERVATION.value or \
    sensor == Input.LEFT_CAMERA.value:
        obs[sensor] = ImageObservationSpace(shape=np.array(
            (TRAINING_IMAGE_SIZE[1], TRAINING_IMAGE_SIZE[0], 3)),
                                            high=255,
                                            channels_axis=-1)
    elif sensor == Input.STEREO.value:
        obs[sensor] = PlanarMapsObservationSpace(shape=np.array(
            (TRAINING_IMAGE_SIZE[1], TRAINING_IMAGE_SIZE[0], 2)),
                                                 low=0,
                                                 high=255,
                                                 channels_axis=-1)
    elif sensor == Input.LIDAR.value:
        obs[sensor] = VectorObservationSpace(shape=TRAINING_LIDAR_SIZE,
                                             low=0.15,
                                             high=1.0)
    elif sensor == Input.SECTOR_LIDAR.value:
        obs[sensor] = VectorObservationSpace(shape=NUMBER_OF_LIDAR_SECTORS,
                                             low=0.0,
                                             high=1.0)
    elif sensor == Input.DISCRETIZED_SECTOR_LIDAR.value:
        lidar_config = model_metadata[ModelMetadataKeys.LIDAR_CONFIG.value]
        shape = lidar_config[ModelMetadataKeys.NUM_SECTORS.value] * \
                lidar_config[ModelMetadataKeys.NUM_VALUES_PER_SECTOR.value]
        obs[sensor] = VectorObservationSpace(shape=shape, low=0.0, high=1.0)
    else:
        raise Exception(
            "Unable to set observation space for sensor {}".format(sensor))
    return obs
def test_get_filtered_observation_space():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([1]))
    assert filtered_observation_space.measurements_names == ['a']

    # Discard
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([2]))
    assert filtered_observation_space.measurements_names == ['b', 'c']
def test_filter():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (1, )

    # Discard
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))
    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (2, )
Ejemplo n.º 12
0
def train_on_csv_file(csv_file, n_epochs, dataset_size, obs_dim, act_dim):
    tf.reset_default_graph(
    )  # just to clean things up; only needed for the tutorial

    schedule_params = set_schedule_params(n_epochs, dataset_size)

    #########
    # Agent #
    #########
    # note that we have moved to BCQ, which will help the training to converge better and faster
    agent_params = set_agent_params(DDQNBCQAgentParameters)
    # additional setting for DDQNBCQAgentParameters agent parameters
    # can use either a kNN or a NN based model for predicting which actions not to max over in the bellman equation
    # agent_params.algorithm.action_drop_method_parameters = KNNParameters()
    agent_params.algorithm.action_drop_method_parameters = NNImitationModelParameters(
    )

    DATATSET_PATH = csv_file
    agent_params.memory.load_memory_from_file_path = CsvDataset(
        DATATSET_PATH, is_episodic=True)

    spaces = SpacesDefinition(state=StateSpace(
        {'observation': VectorObservationSpace(shape=obs_dim)}),
                              goal=None,
                              action=DiscreteActionSpace(act_dim),
                              reward=RewardSpace(1))

    graph_manager = BatchRLGraphManager(
        agent_params=agent_params,
        env_params=None,
        spaces_definition=spaces,
        schedule_params=schedule_params,
        vis_params=VisualizationParameters(
            dump_signals_to_csv_every_x_episodes=1),
        reward_model_num_epochs=30,
        train_to_eval_ratio=0.4)
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
    return
Ejemplo n.º 13
0
    def __init__(
            self,
            level: LevelSelection,
            seed: int,
            frame_skip: int,
            human_control: bool,
            custom_reward_threshold: Union[int, float],
            visualization_parameters: VisualizationParameters,
            host: str,
            port: int,
            timeout: float,
            number_of_vehicles: int,
            number_of_walkers: int,
            weather_id: int,  #rendering_mode: bool,
            ego_vehicle_filter: str,
            display_size: int,
            sensors: List[SensorTypes],
            camera_height: int,
            camera_width: int,
            lidar_bin: float,
            obs_range: float,
            display_route: bool,
            render_pygame: bool,
            d_behind: float,
            out_lane_thres: float,
            desired_speed: float,
            max_past_step: int,
            dt: float,
            discrete: bool,
            discrete_acc: List[float],
            discrete_steer: List[float],
            continuous_accel_range: List[float],
            continuous_steer_range: List[float],
            max_ego_spawn_times: int,
            max_time_episode: int,
            max_waypt: int,
            **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.level = level
        # self.frame_skip = frame_skip
        # self.seed = seed
        # self.human_control = human_control
        # self.custom_reward_threshold = custom_reward_threshold
        # self.visualization_paramters = visualization_parameters

        self.host = host
        self.port = port
        self.timeout = timeout
        self.number_of_vehicles = number_of_vehicles
        self.number_of_walkers = number_of_walkers
        self.weather_id = weather_id

        self.ego_vehicle_filter = ego_vehicle_filter
        self.display_size = display_size
        self.sensors = sensors
        self.camera_height = camera_height
        self.camera_width = camera_width
        self.obs_range = obs_range
        self.lidar_bin = lidar_bin
        self.obs_size = int(self.obs_range / self.lidar_bin)
        self.display_route = display_route
        self.render_pygame = render_pygame
        self.d_behind = d_behind
        self.out_lane_thres = out_lane_thres
        self.desired_speed = desired_speed

        self.max_past_step = max_past_step
        self.dt = dt
        self.discrete = discrete
        self.discrete_acc = discrete_acc
        self.discrete_steer = discrete_steer
        self.continuous_accel_range = continuous_accel_range
        self.continuous_steer_range = continuous_steer_range
        self.max_ego_spawn_times = max_ego_spawn_times
        self.max_time_episode = max_time_episode
        self.max_waypt = max_waypt

        # Connect to carla server and get world object
        print('connecting to Carla server...')
        self.client = carla.Client(self.host, self.port)
        self.client.set_timeout(self.timeout)
        self.traffic_manager = self.client.get_trafficmanager()
        self.world = self.client.load_world(level)
        print('Carla server connected!')

        # Set weather
        self.world.set_weather(CARLA_WEATHER_PRESETS[self.weather_id])

        # Get spawn points
        self._get_spawn_points()

        # Create the ego vehicle blueprint
        self.ego_bp = self._create_vehicle_bluepprint(self.ego_vehicle_filter,
                                                      color='49,8,8')

        # Collision sensor
        self.collision_hist = []  # The collision history
        self.collision_hist_l = 1  # collision history length
        self.collision_bp = self.world.get_blueprint_library().find(
            'sensor.other.collision')

        # Lidar sensor
        self.lidar_data = None
        self.lidar_height = 2.1
        self.lidar_trans = carla.Transform(
            carla.Location(x=0.0, z=self.lidar_height))
        self.lidar_bp = self.world.get_blueprint_library().find(
            'sensor.lidar.ray_cast')
        self.lidar_bp.set_attribute('channels', '32')
        self.lidar_bp.set_attribute('range', '5000')

        # Camera sensor
        self.camera_img = np.zeros((self.camera_height, self.camera_width, 3),
                                   dtype=np.uint8)
        self.camera_trans = carla.Transform(carla.Location(x=0.8, z=1.7))
        self.camera_bp = self.world.get_blueprint_library().find(
            'sensor.camera.rgb')
        # Modify the attributes of the blueprint to set image resolution and field of view.
        self.camera_bp.set_attribute('image_size_x', str(self.camera_width))
        self.camera_bp.set_attribute('image_size_y', str(self.camera_height))
        self.camera_bp.set_attribute('fov', '110')
        # Set the time in seconds between sensor captures
        self.camera_bp.set_attribute('sensor_tick', '0.02')

        # Set fixed simulation step for synchronous mode
        self.settings = self.world.get_settings()
        self.settings.fixed_delta_seconds = self.dt
        #self.settings.no_rendering_mode = rendering_mode
        self._set_synchronous_mode(True)

        # Record the time of total steps and resetting steps
        self.reset_step = 0
        self.total_step = 0

        # Action space
        if self.discrete:
            self.discrete_act = [discrete_acc, discrete_steer]
            self.n_acc = len(self.discrete_act[0])
            self.n_steer = len(self.discrete_act[1])
            self.action_space = DiscreteActionSpace(
                num_actions=self.n_acc * self.n_steer,
                descriptions=["acceleration", "steering"])
        else:
            self.action_space = BoxActionSpace(
                shape=2,
                low=np.array(
                    [continuous_accel_range[0], continuous_steer_range[0]]),
                high=np.array(
                    [continuous_accel_range[1], continuous_steer_range[1]]),
                descriptions=["acceleration", "steering"])

        # Observation space
        self.state_space = StateSpace({
            "measurements":
            VectorObservationSpace(shape=4,
                                   low=np.array([-2, -1, -5, 0]),
                                   high=np.array([2, 1, 30, 1]),
                                   measurements_names=[
                                       "lat_dist", "heading_error",
                                       "ego_speed", "safety_margin"
                                   ])
        })

        if SensorTypes.FRONT_CAMERA in self.sensors:
            self.state_space[
                SensorTypes.FRONT_CAMERA.value] = ImageObservationSpace(
                    shape=np.array([self.camera_height, self.camera_width, 3]),
                    high=255)
        if SensorTypes.LIDAR in self.sensors:
            self.state_space[
                SensorTypes.LIDAR.value] = PlanarMapsObservationSpace(
                    shape=np.array([self.obs_size, self.obs_size, 3]),
                    low=0,
                    high=255)
        if SensorTypes.BIRDEYE in self.sensors:
            self.state_space[
                SensorTypes.BIRDEYE.value] = ImageObservationSpace(
                    shape=np.array([self.obs_size, self.obs_size, 3]),
                    high=255)

        # Initialize the renderer
        self._init_renderer()

        self.reset_internal_state(True)
Ejemplo n.º 14
0
# ER - we'll be needing an episodic replay buffer for off-policy evaluation
agent_params.memory = EpisodicExperienceReplayParameters()

# E-Greedy schedule - there is no exploration in Batch RL. Disabling E-Greedy. 
agent_params.exploration.epsilon_schedule = LinearSchedule(initial_value=0, final_value=0, decay_steps=1)
agent_params.exploration.evaluation_epsilon = 0

# can use either a kNN or a NN based model for predicting which actions not to max over in the bellman equation
#agent_params.algorithm.action_drop_method_parameters = KNNParameters()


DATATSET_PATH = 'acrobot_dataset.csv'
agent_params.memory = EpisodicExperienceReplayParameters()
agent_params.memory.load_memory_from_file_path = CsvDataset(DATATSET_PATH, is_episodic = True)

spaces = SpacesDefinition(state=StateSpace({'observation': VectorObservationSpace(shape=6)}),
                          goal=None,
                          action=DiscreteActionSpace(3),
                          reward=RewardSpace(1))

graph_manager = BatchRLGraphManager(agent_params=agent_params,
                                    env_params=None,
                                    spaces_definition=spaces,
                                    schedule_params=schedule_params,
                                    vis_params=VisualizationParameters(dump_signals_to_csv_every_x_episodes=1),
                                    reward_model_num_epochs=30,
                                    train_to_eval_ratio=0.4)
graph_manager.create_graph(task_parameters)
graph_manager.improve()
Ejemplo n.º 15
0
    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 additional_simulator_parameters: Dict[str, Any] = {},
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 random_initialization_steps: int = 1,
                 max_over_num_frames: int = 1,
                 observation_space_type: ObservationSpaceType = None,
                 **kwargs):
        """
        :param level: (str)
            A string representing the gym level to run. This can also be a LevelSelection object.
            For example, BreakoutDeterministic-v0

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param additional_simulator_parameters: (Dict[str, Any])
            Any additional parameters that the user can pass to the Gym environment. These parameters should be
            accepted by the __init__ function of the implemented Gym environment.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.
            If not set, this value will be taken from the Gym environment definition.

        :param random_initialization_steps: (int)
            The number of random steps that will be taken in the environment after each reset.
            This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees.

        :param max_over_num_frames: (int)
            This value will be used for merging multiple frames into a single frame by taking the maximum value for each
            of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects
            can be seen in one frame but disappear in the next.

        :param observation_space_type:
            This value will be used for generating observation space. Allows a custom space. Should be one of
            ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions
            of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise.
        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)
        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            try:
                self.env = env_class(**self.additional_simulator_parameters)
            except:
                screen.error(
                    "Failed to instantiate Gym environment class %s with arguments %s"
                    % (env_class, self.additional_simulator_parameters),
                    crash=False)
                raise
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = [
            'classic_control', 'mujoco', 'robotics'
        ]
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_roboschool_env = 'roboschool' in str(
            self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning(
                    "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                    "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                    "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4."
                )
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(
                self.env,
                frameskip=self.frame_skip,
                max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space, gym.spaces.dict.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if observation_space_type == ObservationSpaceType.Tensor:
                # we consider arbitrary input tensor which does not necessarily represent images
                self.state_space[
                    observation_space_name] = TensorObservationSpace(
                        shape=np.array(observation_space.shape),
                        low=observation_space.low,
                        high=observation_space.high)
            elif observation_space_type == ObservationSpaceType.Image or len(
                    observation_space.shape) == 3:
                # we assume gym has image observations (with arbitrary number of channels) where their values are
                # within 0-255, and where the channel dimension is the last dimension
                if observation_space.shape[-1] in [1, 3]:
                    self.state_space[
                        observation_space_name] = ImageObservationSpace(
                            shape=np.array(observation_space.shape),
                            high=255,
                            channels_axis=-1)
                else:
                    # For any number of channels other than 1 or 3, use the generic PlanarMaps space
                    self.state_space[
                        observation_space_name] = PlanarMapsObservationSpace(
                            shape=np.array(observation_space.shape),
                            low=0,
                            high=255,
                            channels_axis=-1)
            elif observation_space_type == ObservationSpaceType.Vector or len(
                    observation_space.shape) == 1:
                self.state_space[
                    observation_space_name] = VectorObservationSpace(
                        shape=observation_space.shape[0],
                        low=observation_space.low,
                        high=observation_space.high)
            else:
                raise screen.error(
                    "Failed to instantiate Gym environment class %s with observation space type %s"
                    % (env_class, observation_space_type),
                    crash=True)

        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high)
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description)
        else:
            raise screen.error((
                "Failed to instantiate gym environment class {} due to unsupported "
                "action space {}. Expected BoxActionSpace or DiscreteActionSpace."
            ).format(env_class, self.env.action_space),
                               crash=True)

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()
            else:
                screen.error(
                    "Error: Environment {} does not support human control.".
                    format(self.env),
                    crash=True)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(
            shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
            self.reward_success_threshold = self.env.spec.reward_threshold
            self.reward_space = RewardSpace(
                1, reward_success_threshold=self.reward_success_threshold)

        self.target_success_rate = target_success_rate
Ejemplo n.º 16
0
    def __init__(
            self,
            level: LevelSelection,
            frame_skip: int,
            visualization_parameters: VisualizationParameters,
            seed: Union[None, int] = None,
            human_control: bool = False,
            observation_type: ObservationType = ObservationType.Measurements,
            custom_reward_threshold: Union[int, float] = None,
            **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.observation_type = observation_type

        # load and initialize environment
        domain_name, task_name = self.env_id.split(":")
        self.env = suite.load(domain_name=domain_name,
                              task_name=task_name,
                              task_kwargs={'random': seed})

        if observation_type != ObservationType.Measurements:
            self.env = pixels.Wrapper(
                self.env,
                pixels_only=observation_type == ObservationType.Image)

        # seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        self.state_space = StateSpace({})

        # image observations
        if observation_type != ObservationType.Measurements:
            self.state_space['pixels'] = ImageObservationSpace(
                shape=self.env.observation_spec()['pixels'].shape, high=255)

        # measurements observations
        if observation_type != ObservationType.Image:
            measurements_space_size = 0
            measurements_names = []
            for observation_space_name, observation_space in self.env.observation_spec(
            ).items():
                if len(observation_space.shape) == 0:
                    measurements_space_size += 1
                    measurements_names.append(observation_space_name)
                elif len(observation_space.shape) == 1:
                    measurements_space_size += observation_space.shape[0]
                    measurements_names.extend([
                        "{}_{}".format(observation_space_name, i)
                        for i in range(observation_space.shape[0])
                    ])
            self.state_space['measurements'] = VectorObservationSpace(
                shape=measurements_space_size,
                measurements_names=measurements_names)

        # actions
        self.action_space = BoxActionSpace(
            shape=self.env.action_spec().shape[0],
            low=self.env.action_spec().minimum,
            high=self.env.action_spec().maximum)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)
Ejemplo n.º 17
0
    def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
                 human_control: bool, custom_reward_threshold: Union[int,
                                                                     float],
                 visualization_parameters: VisualizationParameters,
                 server_height: int, server_width: int, camera_height: int,
                 camera_width: int, verbose: bool,
                 experiment_suite: ExperimentSuite, config: str,
                 episode_max_time: int, allow_braking: bool,
                 quality: CarlaEnvironmentParameters.Quality,
                 cameras: List[CameraTypes], weather_id: List[int],
                 experiment_path: str,
                 separate_actions_for_throttle_and_brake: bool,
                 num_speedup_steps: int, max_speed: float, **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        # server configuration
        self.server_height = server_height
        self.server_width = server_width
        self.port = get_open_port()
        self.host = 'localhost'
        self.map_name = CarlaLevel[level.upper()].value['map_name']
        self.map_path = CarlaLevel[level.upper()].value['map_path']
        self.experiment_path = experiment_path

        # client configuration
        self.verbose = verbose
        self.quality = quality
        self.cameras = cameras
        self.weather_id = weather_id
        self.episode_max_time = episode_max_time
        self.allow_braking = allow_braking
        self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake
        self.camera_width = camera_width
        self.camera_height = camera_height

        # setup server settings
        self.experiment_suite = experiment_suite
        self.config = config
        if self.config:
            # load settings from file
            with open(self.config, 'r') as fp:
                self.settings = fp.read()
        else:
            # hard coded settings
            self.settings = CarlaSettings()
            self.settings.set(SynchronousMode=True,
                              SendNonPlayerAgentsInfo=False,
                              NumberOfVehicles=15,
                              NumberOfPedestrians=30,
                              WeatherId=random.choice(
                                  force_list(self.weather_id)),
                              QualityLevel=self.quality.value,
                              SeedVehicles=seed,
                              SeedPedestrians=seed)
            if seed is None:
                self.settings.randomize_seeds()

            self.settings = self._add_cameras(self.settings, self.cameras,
                                              self.camera_width,
                                              self.camera_height)

        # open the server
        self.server = self._open_server()

        logging.disable(40)

        # open the client
        self.game = CarlaClient(self.host, self.port, timeout=99999999)
        self.game.connect()
        if self.experiment_suite:
            self.current_experiment_idx = 0
            self.current_experiment = self.experiment_suite.get_experiments()[
                self.current_experiment_idx]
            self.scene = self.game.load_settings(
                self.current_experiment.conditions)
        else:
            self.scene = self.game.load_settings(self.settings)

        # get available start positions
        self.positions = self.scene.player_start_spots
        self.num_positions = len(self.positions)
        self.current_start_position_idx = 0
        self.current_pose = 0

        # state space
        self.state_space = StateSpace({
            "measurements":
            VectorObservationSpace(
                4, measurements_names=["forward_speed", "x", "y", "z"])
        })
        for camera in self.scene.sensors:
            self.state_space[camera.name] = ImageObservationSpace(
                shape=np.array([self.camera_height, self.camera_width, 3]),
                high=255)

        # action space
        if self.separate_actions_for_throttle_and_brake:
            self.action_space = BoxActionSpace(
                shape=3,
                low=np.array([-1, 0, 0]),
                high=np.array([1, 1, 1]),
                descriptions=["steer", "gas", "brake"])
        else:
            self.action_space = BoxActionSpace(
                shape=2,
                low=np.array([-1, -1]),
                high=np.array([1, 1]),
                descriptions=["steer", "gas_and_brake"])

        # human control
        if self.human_control:
            # convert continuous action space to discrete
            self.steering_strength = 0.5
            self.gas_strength = 1.0
            self.brake_strength = 0.5
            # TODO: reverse order of actions
            self.action_space = PartialDiscreteActionSpaceMap(
                target_actions=[[0., 0.], [0., -self.steering_strength],
                                [0., self.steering_strength],
                                [self.gas_strength, 0.],
                                [-self.brake_strength, 0],
                                [self.gas_strength, -self.steering_strength],
                                [self.gas_strength, self.steering_strength],
                                [self.brake_strength, -self.steering_strength],
                                [self.brake_strength, self.steering_strength]],
                descriptions=[
                    'NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE',
                    'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT',
                    'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT'
                ])

            # map keyboard keys to actions
            for idx, action in enumerate(self.action_space.descriptions):
                for key in key_map.keys():
                    if action == key:
                        self.key_to_action[key_map[key]] = idx

        self.num_speedup_steps = num_speedup_steps
        self.max_speed = max_speed

        # measurements
        self.autopilot = None
        self.planner = Planner(self.map_name)

        # env initialization
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            self.renderer.create_screen(image.shape[1], image.shape[0])
Ejemplo n.º 18
0
    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 width: int = 84,
                 height: int = 84,
                 num_rotations: int = 24,
                 episode_length: int = 50000,
                 **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        # seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        self.object = Object2D(width, height, num_rotations)
        self.last_result = self.object.reset()

        self.state_space = StateSpace({})

        # image observations
        self.state_space['observation'] = PlanarMapsObservationSpace(
            shape=np.array([width, height, 3]), low=0, high=255)

        # measurements observations
        measurements_space_size = 4
        measurements_names = [
            'x-position', 'y-position', 'rotation', 'num_covered_states'
        ]
        self.state_space['measurements'] = VectorObservationSpace(
            shape=measurements_space_size,
            measurements_names=measurements_names)

        # actions
        self.num_actions = 6
        self.action_space = DiscreteActionSpace(self.num_actions)

        self.steps = 0
        self.episode_length = episode_length

        self.width = width
        self.height = height
        self.num_rotations = num_rotations
        self.bin_size = 1
        self.covered_states = np.zeros(
            (int(self.width / self.bin_size), int(self.height / self.bin_size),
             self.num_rotations))
        self.num_covered_states = 0

        # render
        if self.is_rendered:
            image = np.squeeze(self.object.render())
            self.renderer.create_screen(image.shape[1], image.shape[0])

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)
Ejemplo n.º 19
0
# NN configuration
agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False
agent_params.network_wrappers['main'].softmax_temperature = 0.2

# ER size
agent_params.memory = EpisodicExperienceReplayParameters()
DATATSET_PATH = os.path.join(os.path.expanduser('~'),
                             'share/Data/MLA/L2P/L2_data_rnd_RL.csv')
agent_params.memory.load_memory_from_file_path = CsvDataset(
    DATATSET_PATH, True)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(0, 0, 10000)
agent_params.exploration.evaluation_epsilon = 0

spaces = SpacesDefinition(state=StateSpace(
    {'observation': VectorObservationSpace(shape=7)}),
                          goal=None,
                          action=DiscreteActionSpace(4),
                          reward=RewardSpace(1))

graph_manager = BatchRLGraphManager(
    agent_params=agent_params,
    env_params=None,
    spaces_definition=spaces,
    schedule_params=schedule_params,
    vis_params=VisualizationParameters(dump_signals_to_csv_every_x_episodes=1),
    reward_model_num_epochs=30,
    train_to_eval_ratio=0.4)
Ejemplo n.º 20
0
    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 screen_size: int = 84,
                 minimap_size: int = 64,
                 feature_minimap_maps_to_use: List = range(7),
                 feature_screen_maps_to_use: List = range(17),
                 observation_type:
                 StarcraftObservationType = StarcraftObservationType.Features,
                 disable_fog: bool = False,
                 auto_select_all_army: bool = True,
                 use_full_action_space: bool = False,
                 **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.screen_size = screen_size
        self.minimap_size = minimap_size
        self.feature_minimap_maps_to_use = feature_minimap_maps_to_use
        self.feature_screen_maps_to_use = feature_screen_maps_to_use
        self.observation_type = observation_type
        self.features_screen_size = None
        self.feature_minimap_size = None
        self.rgb_screen_size = None
        self.rgb_minimap_size = None
        if self.observation_type == StarcraftObservationType.Features:
            self.features_screen_size = screen_size
            self.feature_minimap_size = minimap_size
        elif self.observation_type == StarcraftObservationType.RGB:
            self.rgb_screen_size = screen_size
            self.rgb_minimap_size = minimap_size
        self.disable_fog = disable_fog
        self.auto_select_all_army = auto_select_all_army
        self.use_full_action_space = use_full_action_space

        # step_mul is the equivalent to frame skipping. Not sure if it repeats actions in between or not though.
        self.env = sc2_env.SC2Env(
            map_name=self.env_id,
            step_mul=frame_skip,
            visualize=self.is_rendered,
            agent_interface_format=sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(
                    screen=self.features_screen_size,
                    minimap=self.feature_minimap_size)
                # rgb_dimensions=sc2_env.Dimensions(
                #     screen=self.rgb_screen_size,
                #     minimap=self.rgb_screen_size
                # )
            ),
            # feature_screen_size=self.features_screen_size,
            # feature_minimap_size=self.feature_minimap_size,
            # rgb_screen_size=self.rgb_screen_size,
            # rgb_minimap_size=self.rgb_screen_size,
            disable_fog=disable_fog,
            random_seed=self.seed)

        # print all the available actions
        # self.env = available_actions_printer.AvailableActionsPrinter(self.env)

        self.reset_internal_state(True)
        """
        feature_screen:  [height_map, visibility_map, creep, power, player_id, player_relative, unit_type, selected,
                          unit_hit_points, unit_hit_points_ratio, unit_energy, unit_energy_ratio, unit_shields,
                          unit_shields_ratio, unit_density, unit_density_aa, effects]
        feature_minimap: [height_map, visibility_map, creep, camera, player_id, player_relative, selecte
        d]
        player:          [player_id, minerals, vespene, food_cap, food_army, food_workers, idle_worker_dount,
                          army_count, warp_gate_count, larva_count]
        """
        self.screen_shape = np.array(
            self.env.observation_spec()[0]['feature_screen'])
        self.screen_shape[0] = len(self.feature_screen_maps_to_use)
        self.minimap_shape = np.array(
            self.env.observation_spec()[0]['feature_minimap'])
        self.minimap_shape[0] = len(self.feature_minimap_maps_to_use)
        self.state_space = StateSpace({
            "screen":
            PlanarMapsObservationSpace(shape=self.screen_shape,
                                       low=0,
                                       high=255,
                                       channels_axis=0),
            "minimap":
            PlanarMapsObservationSpace(shape=self.minimap_shape,
                                       low=0,
                                       high=255,
                                       channels_axis=0),
            "measurements":
            VectorObservationSpace(self.env.observation_spec()[0]["player"][0])
        })
        if self.use_full_action_space:
            action_identifiers = list(self.env.action_spec()[0].functions)
            num_action_identifiers = len(action_identifiers)
            action_arguments = [(arg.name, arg.sizes)
                                for arg in self.env.action_spec()[0].types]
            sub_action_spaces = [DiscreteActionSpace(num_action_identifiers)]
            for argument in action_arguments:
                for dimension in argument[1]:
                    sub_action_spaces.append(DiscreteActionSpace(dimension))
            self.action_space = CompoundActionSpace(sub_action_spaces)
        else:
            self.action_space = BoxActionSpace(2,
                                               0,
                                               self.screen_size - 1,
                                               ["X-Axis, Y-Axis"],
                                               default_action=np.array([
                                                   self.screen_size / 2,
                                                   self.screen_size / 2
                                               ]))

        self.target_success_rate = target_success_rate
Ejemplo n.º 21
0
    def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
                 human_control: bool, custom_reward_threshold: Union[int,
                                                                     float],
                 visualization_parameters: VisualizationParameters,
                 cameras: List[CameraTypes], **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.cameras = cameras

        # load the emulator with the required level
        self.level = DoomLevel[level.upper()]
        local_scenarios_path = path.join(
            os.path.dirname(os.path.realpath(__file__)), 'doom')
        self.scenarios_dir = local_scenarios_path if 'COACH_LOCAL' in level \
            else path.join(environ.get('VIZDOOM_ROOT'), 'scenarios')

        self.game = vizdoom.DoomGame()
        self.game.load_config(path.join(self.scenarios_dir, self.level.value))
        self.game.set_window_visible(False)
        self.game.add_game_args("+vid_forcesurface 1")

        self.wait_for_explicit_human_action = True
        if self.human_control:
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_640X480)
        elif self.is_rendered:
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_320X240)
        else:
            # lower resolution since we actually take only 76x60 and we don't need to render
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_160X120)

        self.game.set_render_hud(False)
        self.game.set_render_crosshair(False)
        self.game.set_render_decals(False)
        self.game.set_render_particles(False)
        for camera in self.cameras:
            if hasattr(self.game, 'set_{}_enabled'.format(camera.value[1])):
                getattr(self.game,
                        'set_{}_enabled'.format(camera.value[1]))(True)
        self.game.init()

        # actions
        actions_description = ['NO-OP']
        actions_description += [
            str(action).split(".")[1]
            for action in self.game.get_available_buttons()
        ]
        actions_description = actions_description[::-1]
        self.action_space = MultiSelectActionSpace(
            self.game.get_available_buttons_size(),
            max_simultaneous_selected_actions=1,
            descriptions=actions_description,
            allow_no_action_to_be_selected=True)

        # human control
        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            for idx, action in enumerate(self.action_space.descriptions):
                if action in key_map.keys():
                    self.key_to_action[(key_map[action], )] = idx

        # states
        self.state_space = StateSpace({
            "measurements":
            VectorObservationSpace(
                self.game.get_state().game_variables.shape[0],
                measurements_names=[
                    str(m) for m in self.game.get_available_game_variables()
                ])
        })
        for camera in self.cameras:
            self.state_space[camera.value[0]] = ImageObservationSpace(
                shape=np.array([
                    self.game.get_screen_height(),
                    self.game.get_screen_width(), 3
                ]),
                high=255)

        # seed
        if seed is not None:
            self.game.set_seed(seed)
        self.reset_internal_state()

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            self.renderer.create_screen(image.shape[1], image.shape[0])
Ejemplo n.º 22
0
    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 additional_simulator_parameters: Dict[str, Any] = None,
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 random_initialization_steps: int = 1,
                 max_over_num_frames: int = 1,
                 **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)
        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            if self.additional_simulator_parameters:
                self.env = env_class(**self.additional_simulator_parameters)
            else:
                self.env = env_class()
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = [
            'classic_control', 'mujoco', 'robotics'
        ]
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__)
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        self.timelimit_env_wrapper = self.env
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning(
                    "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                    "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                    "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4."
                )
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(
                self.env,
                frameskip=self.frame_skip,
                max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space,
                          gym.spaces.dict_space.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if len(observation_space.shape
                   ) == 3 and observation_space.shape[-1] == 3:
                # we assume gym has image observations which are RGB and where their values are within 0-255
                self.state_space[
                    observation_space_name] = ImageObservationSpace(
                        shape=np.array(observation_space.shape),
                        high=255,
                        channels_axis=-1)
            else:
                self.state_space[
                    observation_space_name] = VectorObservationSpace(
                        shape=observation_space.shape[0],
                        low=observation_space.low,
                        high=observation_space.high)
        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high)
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description)

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        # measurements
        if self.env.spec is not None:
            self.timestep_limit = self.env.spec.timestep_limit
        else:
            self.timestep_limit = None

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(
            shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
            self.reward_success_threshold = self.env.spec.reward_threshold
            self.reward_space = RewardSpace(
                1, reward_success_threshold=self.reward_success_threshold)
Ejemplo n.º 23
0
    def __init__(
            self,
            level: LevelSelection,
            frame_skip: int,
            visualization_parameters: VisualizationParameters,
            target_success_rate: float = 1.0,
            seed: Union[None, int] = None,
            human_control: bool = False,
            observation_type: ObservationType = ObservationType.Measurements,
            custom_reward_threshold: Union[int, float] = None,
            **kwargs):
        """
        :param level: (str)
            A string representing the control suite level to run. This can also be a LevelSelection object.
            For example, cartpole:swingup.

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param target_success_rate: (float)
            Stop experiment if given target success rate was achieved.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param observation_type: (ObservationType)
            An enum which defines which observation to use. The current options are to use:
            * Measurements only - a vector of joint torques and similar measurements
            * Image only - an image of the environment as seen by a camera attached to the simulator
            * Measurements & Image - both type of observations will be returned in the state using the keys
            'measurements' and 'pixels' respectively.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.

        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.observation_type = observation_type

        # load and initialize environment
        domain_name, task_name = self.env_id.split(":")
        self.env = suite.load(domain_name=domain_name,
                              task_name=task_name,
                              task_kwargs={'random': seed})

        if observation_type != ObservationType.Measurements:
            self.env = pixels.Wrapper(
                self.env,
                pixels_only=observation_type == ObservationType.Image)

        # seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        self.state_space = StateSpace({})

        # image observations
        if observation_type != ObservationType.Measurements:
            self.state_space['pixels'] = ImageObservationSpace(
                shape=self.env.observation_spec()['pixels'].shape, high=255)

        # measurements observations
        if observation_type != ObservationType.Image:
            measurements_space_size = 0
            measurements_names = []
            for observation_space_name, observation_space in self.env.observation_spec(
            ).items():
                if len(observation_space.shape) == 0:
                    measurements_space_size += 1
                    measurements_names.append(observation_space_name)
                elif len(observation_space.shape) == 1:
                    measurements_space_size += observation_space.shape[0]
                    measurements_names.extend([
                        "{}_{}".format(observation_space_name, i)
                        for i in range(observation_space.shape[0])
                    ])
            self.state_space['measurements'] = VectorObservationSpace(
                shape=measurements_space_size,
                measurements_names=measurements_names)

        # actions
        self.action_space = BoxActionSpace(
            shape=self.env.action_spec().shape[0],
            low=self.env.action_spec().minimum,
            high=self.env.action_spec().maximum)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        self.target_success_rate = target_success_rate
    def __init__(self,
                 level: LevelSelection,
                 seed: int,
                 frame_skip: int,
                 human_control: bool,
                 custom_reward_threshold: Union[int, float],
                 visualization_parameters: VisualizationParameters,
                 cameras: List[CameraTypes],
                 target_success_rate: float = 1.0,
                 **kwargs):
        """
        :param level: (str)
            A string representing the doom level to run. This can also be a LevelSelection object.
            This should be one of the levels defined in the DoomLevel enum. For example, HEALTH_GATHERING.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param cameras: (List[CameraTypes])
            A list of camera types to use as observation in the state returned from the environment.
            Each camera should be an enum from CameraTypes, and there are several options like an RGB observation,
            a depth map, a segmentation map, and a top down map of the enviornment.

		:param target_success_rate: (float)
			Stop experiment if given target success rate was achieved.

        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.cameras = cameras

        # load the emulator with the required level
        self.level = DoomLevel[level.upper()]
        local_scenarios_path = path.join(
            os.path.dirname(os.path.realpath(__file__)), 'doom')
        if 'COACH_LOCAL' in level:
            self.scenarios_dir = local_scenarios_path
        elif 'VIZDOOM_ROOT' in environ:
            self.scenarios_dir = path.join(environ.get('VIZDOOM_ROOT'),
                                           'scenarios')
        else:
            self.scenarios_dir = path.join(
                os.path.dirname(os.path.realpath(vizdoom.__file__)),
                'scenarios')

        self.game = vizdoom.DoomGame()
        self.game.load_config(path.join(self.scenarios_dir, self.level.value))
        self.game.set_window_visible(False)
        self.game.add_game_args("+vid_forcesurface 1")

        self.wait_for_explicit_human_action = True
        if self.human_control:
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_640X480)
        elif self.is_rendered:
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_320X240)
        else:
            # lower resolution since we actually take only 76x60 and we don't need to render
            self.game.set_screen_resolution(
                vizdoom.ScreenResolution.RES_160X120)

        self.game.set_render_hud(False)
        self.game.set_render_crosshair(False)
        self.game.set_render_decals(False)
        self.game.set_render_particles(False)
        for camera in self.cameras:
            if hasattr(self.game, 'set_{}_enabled'.format(camera.value[1])):
                getattr(self.game,
                        'set_{}_enabled'.format(camera.value[1]))(True)
        self.game.init()

        # actions
        actions_description = ['NO-OP']
        actions_description += [
            str(action).split(".")[1]
            for action in self.game.get_available_buttons()
        ]
        actions_description = actions_description[::-1]
        self.action_space = MultiSelectActionSpace(
            self.game.get_available_buttons_size(),
            max_simultaneous_selected_actions=1,
            descriptions=actions_description,
            allow_no_action_to_be_selected=True)

        # human control
        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            for idx, action in enumerate(self.action_space.descriptions):
                if action in key_map.keys():
                    self.key_to_action[(key_map[action], )] = idx

        # states
        self.state_space = StateSpace({
            "measurements":
            VectorObservationSpace(
                self.game.get_state().game_variables.shape[0],
                measurements_names=[
                    str(m) for m in self.game.get_available_game_variables()
                ])
        })
        for camera in self.cameras:
            self.state_space[camera.value[0]] = ImageObservationSpace(
                shape=np.array([
                    self.game.get_screen_height(),
                    self.game.get_screen_width(), 3
                ]),
                high=255)

        # seed
        if seed is not None:
            self.game.set_seed(seed)
        self.reset_internal_state()

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            self.renderer.create_screen(image.shape[1], image.shape[0])

        self.target_success_rate = target_success_rate