def gen_obs_space(self, encoder_dict=None): state = self.state_function() if isinstance(state, dict): ob_space_dict = OrderedDict() for space_name in state.keys(): if encoder_dict is not None and "im" in space_name: if "im" in encoder_dict.keys(): shape = encoder_dict[space_name].get_output_shape_at( 0)[1:] else: shape = state[space_name].shape elif encoder_dict is not None and space_name == "forces": if "forces" in encoder_dict.keys(): shape = ( encoder_dict[space_name].flow[-1].output_dim, ) else: shape = state["forces"].shape else: shape = state[space_name].shape high = np.inf * np.ones(shape) low = -high ob_space_dict[space_name] = spaces.Box(low, high, dtype=np.float32) self.observation_space = Dict(ob_space_dict) else: high = np.inf * np.ones(state.shape) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32)
def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) self.reward_type = reward_type self.indicator_threshold=indicator_threshold self.fixed_goal = fixed_goal self.fix_goal = fix_goal self._state_goal = None self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed)) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8]) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.reset()
def __init__(self, config): self.observation_space = config.get( "space", Tuple([Discrete(2), Dict({"a": Box(-1.0, 1.0, (2, ))})])) self.action_space = self.observation_space self.flattened_action_space = flatten_space(self.action_space) self.episode_len = config.get("episode_len", 100)
def __init__(self, env): super().__init__(env) self._num_action_parts = 1 self._action_params_offset = 0 if not self.has_avatar: self._num_action_parts += 1 self._action_params_offset = 1 self._action_splits = np.zeros(self._num_action_parts) self._total_position_params = 0 if not self.has_avatar: self._action_splits[0] = self.width * self.height self._total_position_params += self.width * self.height self._action_logit_offsets = {} total_action_params = 0 for i, action_name in enumerate(self.env.action_names): self._action_logit_offsets[ action_name] = total_action_params + self._total_position_params total_action_params += self.num_action_ids[action_name] self._action_splits[self._action_params_offset] = total_action_params self._total_actions = int(np.sum(self._action_splits)) self.action_space = MultiDiscrete(self._action_splits) self.observation_space = Dict({ 'obs': self.observation_space, 'mask': Box(0, 1, shape=(self._total_actions, )), })
def _augment_observation_space(self) -> None: """ Update the observation space with augmented dimensions """ for agent, space in self.observation_spaces.items(): if self._use_dict_obs_space: spaces = { 'original_obs': space, 'random_noise': Box(low=np.full(self._latent_parameter_dim, -np.inf), high=np.full(self._latent_parameter_dim, np.inf), dtype=np.float32) } # This 'Dict' is gym.spaces.Dict new_space = Dict(spaces) else: if not isinstance(space, Box): raise TypeError("Cannot handle spaces that are not box now") if len(space.shape) > 1: raise TypeError("Cannot handle non-flat spaces now") original_low = space.low original_high = space.high low = np.concatenate([original_low, np.full(self._latent_parameter_dim, -np.inf)]) high = np.concatenate([original_high, np.full(self._latent_parameter_dim, np.inf)]) new_space = Box(low=low, high=high, dtype=np.float32) self.observation_spaces[agent] = new_space
def __init__(self, env, keys_self, keys_copy=[]): super().__init__(env) self.keys_self = sorted(keys_self) self.keys_copy = sorted(keys_copy) self.n_agents = self.metadata['n_actors'] new_spaces = {} for k, v in self.observation_space.spaces.items(): # If obs is a self obs, then we only want to include other agents obs, # as we will pass the self obs separately. assert len(v.shape) > 1, f'Obs {k} has shape {v.shape}' if 'mask' in k: if k in self.keys_self: new_spaces[k] = Box(low=v.low[:, 1:], high=v.high[:, 1:], dtype=v.dtype) else: new_spaces[k] = v elif k in self.keys_self: assert v.shape[0] == self.n_agents, ( f"For self obs, obs dim 0 must equal number of agents. {k} has shape {v.shape}") obs_shape = (v.shape[0], self.n_agents - 1, v.shape[1]) lows = np.tile(v.low, self.n_agents - 1).reshape(obs_shape) highs = np.tile(v.high, self.n_agents - 1).reshape(obs_shape) new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype) elif k in self.keys_copy: new_spaces[k] = deepcopy(v) else: obs_shape = (v.shape[0], self.n_agents, v.shape[1]) lows = np.tile(v.low, self.n_agents).reshape(obs_shape).transpose((1, 0, 2)) highs = np.tile(v.high, self.n_agents).reshape(obs_shape).transpose((1, 0, 2)) new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype) for k in self.keys_self: new_spaces[k + '_self'] = self.observation_space.spaces[k] self.observation_space = Dict(new_spaces)
def __init__(self, **kwargs): """ kwargs ------ apple_num : int number of apples in a map. Default is 1 eat_apple : float reward given when apple is eaten. Default is 1.0 hit_wall : float punishment(or reward?) given when hit wall. Default is 0 """ #Turn left 45°, Move forward, Turn right 45° self.action_space = Discrete(3) self._done = False self.viewer = None self.engine = None kwargs.setdefault('apple_num', 1) kwargs.setdefault('eat_apple', 1.0) kwargs.setdefault('hit_wall', 0) self._options = kwargs self.max_step = 1000 self.cur_step = 0 self.image_size = (720, 720) self.seed() # 3 Continuous Inputs from both eyes self.observation_space = Dict({ 'Right': Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8), 'Left': Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8) })
def __init__(self, x_dim=5, y_dim=5, **kwargs): # From https://github.com/openai/gym/blob/master/gym/envs/robotics/fetch/pick_and_place.py initial_qpos = { 'robot0:slide0': 0.405, 'robot0:slide1': 0.48, 'robot0:slide2': 0.0, 'object0:joint': [1.3, 0.75, 0.4, 1., 0., 0., 0.], } fetch_env.FetchEnv.__init__(self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, gripper_extra_height=0.2, target_in_the_air=False, target_offset=0.0, obj_range=0.10, target_range=0.10, distance_threshold=0.05, initial_qpos=initial_qpos, reward_type="sparse") self.x_states = x_dim self.y_states = y_dim self._old_action_space = self.action_space self._old_observation_space = self.observation_space self.action_space = Discrete(5) #Right, Up, Left, Down, Grab self.observation_space = Dict( dict(desired_goal=Discrete(x_dim * y_dim), achieved_goal=Discrete(x_dim * y_dim), observation=Dict( dict(position=Discrete(x_dim * y_dim), target_position=Discrete(x_dim * y_dim + 1))))) self.action_handlers = [ self._move_function((0., 1., 0., -1.), no_move_if=lambda s: s % self.x_states == 0), self._move_function((1., 0., 0., -1.), no_move_if=lambda s: s < self.x_states), self._move_function((0., -1., 0., -1.), no_move_if=lambda s: (s + 1) % self.x_states == 0), self._move_function((-1., 0., 0., -1.), no_move_if=lambda s: s / self.x_states > (self.y_states - 1)) ] self.action_handlers.append(self._grab)
def __init__(self, frame_skip=30, action_scale=10, keep_vel_in_obs=True, use_safety_box=False, fix_goal=False, fixed_goal=(0.05, 0.6, 0.15), reward_type='hand_distance', indicator_threshold=.05, goal_low=None, goal_high=None, ): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) if goal_low is None: goal_low = np.array([-0.1, 0.5, 0.02]) else: goal_low = np.array(goal_low) if goal_high is None: goal_high = np.array([0.1, 0.7, 0.2]) else: goal_high = np.array(goal_low) self.safety_box = Box( goal_low, goal_high ) self.keep_vel_in_obs = keep_vel_in_obs self.goal_space = Box(goal_low, goal_high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box( -np.inf * np.ones(3), np.inf * np.ones(3) ) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.use_safety_box=use_safety_box self.prev_qpos = self.init_angles.copy() self.reward_type = reward_type self.indicator_threshold = indicator_threshold goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.reset()
def __init__( self, wrapped_env, vae, pixel_cnn=None, vae_input_key_prefix='image', sample_from_true_prior=False, decode_goals=False, decode_goals_on_reset=True, render_goals=False, render_rollouts=False, reward_params=None, goal_sampling_mode="vae_prior", imsize=84, obs_size=None, norm_order=2, epsilon=20, presampled_goals=None, ): if reward_params is None: reward_params = dict() super().__init__( wrapped_env, vae, vae_input_key_prefix, sample_from_true_prior, decode_goals, decode_goals_on_reset, render_goals, render_rollouts, reward_params, goal_sampling_mode, imsize, obs_size, norm_order, epsilon, presampled_goals, ) if type(pixel_cnn) is str: self.pixel_cnn = load_local_or_remote_file(pixel_cnn) self.representation_size = self.vae.representation_size self.imsize = self.vae.imsize print("Location: BiGAN WRAPPER") latent_space = Box( -10 * np.ones(obs_size or self.representation_size), 10 * np.ones(obs_size or self.representation_size), dtype=np.float32, ) spaces = self.wrapped_env.observation_space.spaces spaces['observation'] = latent_space spaces['desired_goal'] = latent_space spaces['achieved_goal'] = latent_space spaces['latent_observation'] = latent_space spaces['latent_desired_goal'] = latent_space spaces['latent_achieved_goal'] = latent_space self.observation_space = Dict(spaces)
def __init__( self, goal_low=(-.25, .3, .12, -1.5708), goal_high=(.25, .6, .12, 0), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0, .45, .12, -.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.25, 0.3, .12), hand_high=(0.25, 0.6, .12), target_pos_scale=1, target_angle_scale=1, min_angle=-1.5708, max_angle=0, xml_path='sawyer_xyz/sawyer_door_pull.xml', **sawyer_xyz_kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__( self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs ) MultitaskEnv.__init__(self) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1]), np.array([1, 1])) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.state_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') self.reset()
def __init__(self): self.counter = 0 self.observation_space = Dict({ 'test1d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5]), 'test2d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5]), 'test3d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5]) }) self.action_space = self.observation_space
def __init__(self): self.action_space = Discrete(max_action_size) self.observation_space = Dict({ "state": REAL_OBSERVATION_SPACE, "action_mask": Box(low=0, high=1, shape=(max_action_size, )) })
def __init__(self, puck_low=None, puck_high=None, reward_type='hand_and_puck_distance', indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) goal_low = np.array(goal_low) goal_high = np.array(goal_high) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), ) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = self.get_puck_pos()[2]
def __init__( self, wrapped_env, encoder: Encoder, encoder_input_prefix, key_prefix='encoder', reward_mode='encoder_distance', ): """ :param wrapped_env: :param encoder: :param encoder_input_prefix: :param key_prefix: :param reward_mode: - 'encoder_distance': l1 distance in encoder distance - 'vectorized_encoder_distance': vectorized l1 distance in encoder distance, i.e. negative absolute value - 'env': use the wrapped env's reward """ super().__init__(wrapped_env) if reward_mode not in { self.ENCODER_DISTANCE_REWARD, self.VECTORIZED_ENCODER_DISTANCE_REWARD, self.ENV_REWARD, }: raise ValueError(reward_mode) self._encoder = encoder self._encoder_input_obs_key = '{}_observation'.format( encoder_input_prefix) self._encoder_input_desired_goal_key = '{}_desired_goal'.format( encoder_input_prefix ) self._encoder_input_achieved_goal_key = '{}_achieved_goal'.format( encoder_input_prefix ) self._reward_mode = reward_mode spaces = self.wrapped_env.observation_space.spaces latent_space = Box( encoder.min_embedding, encoder.max_embedding, dtype=np.float32, ) self._embedding_size = encoder.min_embedding.size self._obs_key = '{}_observation'.format(key_prefix) self._desired_goal_key = '{}_desired_goal'.format(key_prefix) self._achieved_goal_key = '{}_achieved_goal'.format(key_prefix) self._distance_name = '{}_distance'.format(key_prefix) self._key_prefix = key_prefix self._desired_goal = { self._desired_goal_key: np.zeros_like(latent_space.sample()) } spaces[self._obs_key] = latent_space spaces[self._desired_goal_key] = latent_space spaces[self._achieved_goal_key] = latent_space self.observation_space = Dict(spaces) self._goal_sampling_mode = 'env'
def __init__( self, wrapped_env, vae, vae_input_key_prefix='image', sample_from_true_prior=False, decode_goals=False, decode_goals_on_reset=True, render_goals=False, render_rollouts=False, reward_params=None, goal_sampling_mode="vae_prior", num_goals_to_presample=0, presampled_goals_path=None, imsize=48, obs_size=None, norm_order=2, epsilon=20, presampled_goals=None, ): if reward_params is None: reward_params = dict() super().__init__( wrapped_env, vae, vae_input_key_prefix, sample_from_true_prior, decode_goals, decode_goals_on_reset, render_goals, render_rollouts, reward_params, goal_sampling_mode, presampled_goals_path, num_goals_to_presample, imsize, obs_size, norm_order, epsilon, presampled_goals, ) self.representation_size = self.vae.representation_size latent_space = Box( -10 * np.ones(obs_size or self.representation_size), 10 * np.ones(obs_size or self.representation_size), dtype=np.float32, ) spaces = self.wrapped_env.observation_space.spaces spaces['observation'] = latent_space spaces['desired_goal'] = latent_space spaces['achieved_goal'] = latent_space spaces['latent_observation'] = latent_space spaces['latent_desired_goal'] = latent_space spaces['latent_achieved_goal'] = latent_space self.observation_space = Dict(spaces)
def __init__( self, frame_skip=30, goal_low=(-.1, .5, .06, 0), goal_high=(.1, .6, .06, .5), action_reward_scale=0, pos_action_scale=1 / 100, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.3, 0), target_pos_scale=0.25, target_angle_scale=1, max_x_pos=.1, max_y_pos=.7, ): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box( np.array(goal_low), np.array(goal_high), ) self._state_goal = None self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1])) max_angle = 1.5708 self.state_space = Box( np.array([-1, -1, -1, -max_angle]), np.array([1, 1, 1, max_angle]), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.state_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.state_space), ('state_achieved_goal', self.state_space), ]) self._pos_action_scale = pos_action_scale self.target_pos_scale = target_pos_scale self.action_reward_scale = action_reward_scale self.target_angle_scale = target_angle_scale self.max_x_pos = max_x_pos self.max_y_pos = max_y_pos self.min_y_pos = .5 self.reset() self.reset_mocap_welds()
def __init__(self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.65, 0.03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, hide_arm=False, num_objects=1, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) self.hide_arm = hide_arm SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.num_objects = num_objects if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, np.tile(obj_low, num_objects))) if goal_high is None: goal_high = np.hstack( (self.hand_high, np.tile(obj_high, num_objects))) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, np.tile(obj_low, num_objects))), np.hstack((self.hand_high, np.tile(obj_high, num_objects))), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ])
def __init__(self, env_name=None, mode=None, act_dim=None, reward_type='threshold', img_dim=32, camera_id=0, path_length=200, threshold=0.8, gpu_id=0, **kwargs): self.dm_env = suite.load(env_name, mode) self.task = self.dm_env._task self.camera_id = camera_id self.physics = self.dm_env.physics self.max_steps = path_length self.threshold = threshold self.reward_type = reward_type self.device = torch.device( "cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu") # actions always between -1.0 and 1.0 self.action_space = Box( high=1.0, low=-1.0, shape=self.dm_env.action_spec().shape if act_dim is None else (act_dim, )) # observtions are, in principle, unbounded state_space = Box(high=float("inf"), low=-float("inf"), shape=(128, )) goal_space = Box(high=float("inf"), low=-float("inf"), shape=(128, )) self.observation_space = Dict([ ('observation', state_space), ('desired_goal', goal_space), ('achieved_goal', goal_space), #('state_observation', state_space), #('state_desired_goal', goal_space), #('state_achieved_goal', goal_space), ]) self.render_kwargs = dict(width=img_dim, height=img_dim, camera_id=self.camera_id) self.model1 = VAE(img_dim=img_dim, image_channels=3, z_dim=128, device=self.device) self.model2 = VAE(img_dim=img_dim, image_channels=3, z_dim=128, device=self.device) self.model1 = self.model1.to(self.device) self.model2 = self.model2.to(self.device)
def __init__(self, config: Config): spaces = { "pointgoal": Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2, ), dtype=np.float32, ) } if config.INPUT_TYPE in ["depth", "rgbd"]: spaces["depth"] = Box( low=0, high=1, shape=(config.RESOLUTION, config.RESOLUTION, 1), dtype=np.float32, ) if config.INPUT_TYPE in ["rgb", "rgbd"]: spaces["rgb"] = Box( low=0, high=255, shape=(config.RESOLUTION, config.RESOLUTION, 3), dtype=np.uint8, ) observation_spaces = Dict(spaces) action_spaces = Discrete(4) self.device = torch.device("cuda:{}".format(config.PTH_GPU_ID)) self.hidden_size = config.HIDDEN_SIZE random.seed(config.RANDOM_SEED) torch.random.manual_seed(config.RANDOM_SEED) torch.backends.cudnn.deterministic = True self.actor_critic = Policy( observation_space=observation_spaces, action_space=action_spaces, hidden_size=self.hidden_size, ) self.actor_critic.to(self.device) if config.MODEL_PATH: ckpt = torch.load(config.MODEL_PATH, map_location=self.device) # Filter only actor_critic weights self.actor_critic.load_state_dict({ k.replace("actor_critic.", ""): v for k, v in ckpt["state_dict"].items() if "actor_critic" in k }) else: habitat.logger.error("Model checkpoint wasn't loaded, evaluating " "a random model.") self.test_recurrent_hidden_states = None self.not_done_masks = None
class AvailActionsTestEnv(MultiAgentEnv): num_actions = 10 action_space = Discrete(num_actions) observation_space = Dict({ "obs": Dict({ "test": Dict({ "a": Discrete(2), "b": MultiDiscrete([2, 3, 4]) }), "state": MultiDiscrete([2, 2, 2]) }), "action_mask": Box(0, 1, (num_actions, )), }) def __init__(self, env_config): self.state = None self.avail = env_config["avail_action"] self.action_mask = np.array([0] * 10) self.action_mask[env_config["avail_action"]] = 1 def reset(self): self.state = 0 return { "agent_1": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask } } def step(self, action_dict): if self.state > 0: assert action_dict["agent_1"] == self.avail, \ "Failed to obey available actions mask!" self.state += 1 rewards = {"agent_1": 1} obs = { "agent_1": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask } } dones = {"__all__": self.state > 20} return obs, rewards, dones, {}
def test_seed_Dict(): test_space = Dict( { "a": Box(low=0, high=1, shape=(3, 3)), "b": Dict( { "b_1": Box(low=-100, high=100, shape=(2,)), "b_2": Box(low=-1, high=1, shape=(2,)), } ), "c": Discrete(5), } ) seed_dict = { "a": 0, "b": { "b_1": 1, "b_2": 2, }, "c": 3, } test_space.seed(seed_dict) # "Unpack" the dict sub-spaces into individual spaces a = Box(low=0, high=1, shape=(3, 3)) a.seed(0) b_1 = Box(low=-100, high=100, shape=(2,)) b_1.seed(1) b_2 = Box(low=-1, high=1, shape=(2,)) b_2.seed(2) c = Discrete(5) c.seed(3) for i in range(10): test_s = test_space.sample() a_s = a.sample() assert (test_s["a"] == a_s).all() b_1_s = b_1.sample() assert (test_s["b"]["b_1"] == b_1_s).all() b_2_s = b_2.sample() assert (test_s["b"]["b_2"] == b_2_s).all() c_s = c.sample() assert test_s["c"] == c_s
def observation_space(self): return Dict({ "x": self._x_space, "y": self._y_space, "theta": self._theta_space, "item": self._item_space, "command": self._command_space, "completed_tasks": self._task_completed_space })
def set_image_obsSpace(self): if self.camera_name == 'robotview_zoomed': self.observation_space = Dict([ ('img_observation', Box(0, 1, (3 * (48 * 64) + self.action_space.shape[0], ), dtype=np.float32)), #We append robot config to the image ('state_observation', self.hand_env_space), ])
def __init__(self, env): super().__init__(env) self.n_agents = self.metadata['n_actors'] lows = np.split(self.action_space.low, self.n_agents) highs = np.split(self.action_space.high, self.n_agents) self.action_space = Dict({ 'action_movement': Tuple([Box(low=low, high=high, dtype=self.action_space.dtype) for low, high in zip(lows, highs)]) })
def __init__(self, rules_file, leaf_threshold=16, max_cuts_per_dimension=5, max_actions_per_episode=5000, max_depth=100, partition_mode=None, reward_shape="linear", depth_weight=1.0, dump_dir=None): self.reward_shape = { "linear": lambda x: x, "log": lambda x: np.log(x), }[reward_shape] assert partition_mode in [None, "simple", "efficuts", "cutsplit"] self.partition_enabled = partition_mode == "simple" if partition_mode in ["efficuts", "cutsplit"]: self.force_partition = partition_mode else: self.force_partition = False self.dump_dir = dump_dir and os.path.expanduser(dump_dir) if self.dump_dir: os.makedirs(self.dump_dir, exist_ok=True) self.best_time = float("inf") self.best_space = float("inf") self.depth_weight = depth_weight self.rules_file = rules_file self.rules = load_rules_from_file(rules_file) self.leaf_threshold = leaf_threshold self.max_actions_per_episode = max_actions_per_episode self.max_depth = max_depth self.num_actions = None self.tree = None self.node_map = None self.child_map = None self.max_cuts_per_dimension = max_cuts_per_dimension if self.partition_enabled: self.num_part_levels = NUM_PART_LEVELS else: self.num_part_levels = 0 self.action_space = Tuple([ Discrete(NUM_DIMENSIONS), Discrete(max_cuts_per_dimension + self.num_part_levels) ]) self.observation_space = Dict({ "real_obs": Box(0, 1, (278, ), dtype=np.float32), "action_mask": Box(0, 1, (NUM_DIMENSIONS + max_cuts_per_dimension + self.num_part_levels, ), dtype=np.float32), })
def __init__( self, image_size=48, **kwargs ): PointmassEnv.__init__(self, **kwargs) self.image_size = image_size self.observation_space = Dict({ **self.observation_space, "image_observation": Box( np.zeros([image_size, image_size, 3]), np.ones([image_size, image_size, 3]))})
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.6, 0.02), fix_goal=True, fixed_goal=(1.571), # target angle for the door goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.max_path_length = 150 self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ]) self.reset()
def test_convert_element_to_space_type(self): """Test if space converter works for all elements/space permutations""" box_space = Box(low=-1, high=1, shape=(2, )) discrete_space = Discrete(2) multi_discrete_space = MultiDiscrete([2, 2]) multi_binary_space = MultiBinary(2) tuple_space = Tuple((box_space, discrete_space)) dict_space = Dict({ "box": box_space, "discrete": discrete_space, "multi_discrete": multi_discrete_space, "multi_binary": multi_binary_space, "dict_space": Dict({ "box2": box_space, "discrete2": discrete_space, }), "tuple_space": tuple_space, }) box_space_uncoverted = box_space.sample().astype(np.float64) multi_discrete_unconverted = multi_discrete_space.sample().astype( np.int32) multi_binary_unconverted = multi_binary_space.sample().astype(np.int32) tuple_unconverted = (box_space_uncoverted, float(0)) modified_element = { "box": box_space_uncoverted, "discrete": float(0), "multi_discrete": multi_discrete_unconverted, "multi_binary": multi_binary_unconverted, "tuple_space": tuple_unconverted, "dict_space": { "box2": box_space_uncoverted, "discrete2": float(0), }, } element_with_correct_types = convert_element_to_space_type( modified_element, dict_space.sample()) assert dict_space.contains(element_with_correct_types)
def test_compatibility(): # create all testable spaces spaces = [ Box(-1.0, 1.0, (20, ), np.float32), Box(-1.0, 1.0, (40, ), np.float32), Box(-1.0, 1.0, (20, 20), np.float32), Box(-1.0, 1.0, (20, 24), np.float32), Dict({ 'A': Box(-1.0, 1.0, (20, ), np.float32), 'B': Box(-1.0, 1.0, (20, ), np.float32) }), Dict({ 'A': Box(-1.0, 1.0, (20, ), np.float32), 'B': Box(-1.0, 1.0, (40, ), np.float32) }), Dict({ 'A': Box(-1.0, 1.0, (40, ), np.float32), 'B': Box(-1.0, 1.0, (20, ), np.float32) }), Tuple([ Box(-1.0, 1.0, (20, ), np.float32), Box(-1.0, 1.0, (20, ), np.float32) ]), Tuple([ Box(-1.0, 1.0, (40, ), np.float32), Box(-1.0, 1.0, (20, ), np.float32) ]), Tuple([ Box(-1.0, 1.0, (20, ), np.float32), Box(-1.0, 1.0, (40, ), np.float32) ]), ] # iterate and compare all combinations spaces_range = range(len(spaces)) for x in spaces_range: space1 = spaces[x] for y in spaces_range: # create copy of Space with random bounds space2 = randomize_space_bounds(spaces[y]) expected_compatible = x == y actual_compatible = space1.compatible(space2) assert expected_compatible == actual_compatible