def __init__(self, params={}): params.setdefault('obs_shape', (64, 36)) # not for size b/c don't want aliasing params.setdefault('collision_reward_only', False) params.setdefault('steer_limits', [-30., 30.]) params.setdefault('speed_limits', [2., 2.]) params.setdefault('use_depth', False) params.setdefault('do_back_up', False) self._model_path = params.get('model_path', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models/cylinder.egg')) self._obs_shape = params['obs_shape'] CarEnv.__init__( self, params=params) self._fixed_speed = (params['speed_limits'][0] == params['speed_limits'][1]) assert(self._fixed_speed) self._steer_limits = params['steer_limits'] self._speed_limits = params['speed_limits'] if self._fixed_speed: self.action_space = Box(low=np.array([-1., 1.]), high=np.array([1., 1.])) else: self.action_space = Box(low=np.array([-1., 1.]), high=np.array([-1., 1.])) self._unnormalized_action_space = Box(low=np.array([self._steer_limits[0], self._speed_limits[0]]), high=np.array([self._steer_limits[1], self._speed_limits[1]])) self.observation_space = Box(low=0, high=255, shape=tuple(self._get_observation().shape)) self._collision_reward_only = params['collision_reward_only']
def transformed_observation_space(self, wrapped_observation_space): if type(wrapped_observation_space) is Box: if self.use_activation: return Box(0.0, 1.0, self.out_shape) else: return Box(0.0, 10.0, self.out_shape) else: raise NotImplementedError( "Currently only support Box observation spaces for InceptionTransformer" )
def __init__(self, app_name, time_state=False, idx=0, is_render=False, no_graphics=False, recording=True): Serializable.quick_init(self, locals()) # Unity scene self._env = UnityEnvironment(file_name=app_name, worker_id=idx, no_graphics=no_graphics) self.id = 0 self.name = app_name self.idx = idx self.is_render = is_render self.time_state = time_state self.time_step = 0 # Check brain configuration assert len(self._env.brains) == 1 self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] # Check for number of agents in scene initial_info = self._env.reset()[self.brain_name] self.use_visual = (brain.number_visual_observations == 1) and False self.recording = brain.number_visual_observations == 1 and recording # Set observation and action spaces if brain.vector_action_space_type == "discrete": self._action_space = Discrete(1) else: high = np.array([np.inf] * (brain.vector_action_space_size)) self._action_space = Box(-high, high) # ---------------------------------- if self.use_visual and False and no_graphic: high = np.array([np.inf] * brain.camera_resolutions[0]["height"] * brain.camera_resolutions[0]["width"] * 3) self._observation_space = Box(-high, high) else: if self.time_state: high = np.array([np.inf] * (brain.vector_observation_space_size + 1)) else: high = np.array([np.inf] * (brain.vector_observation_space_size)) self._observation_space = Box(-high, high) # video buffer self.frames = []
def convert_gym_space(space, box_additional_dim=0): if isinstance(space, gym.spaces.Box): if box_additional_dim != 0: low = np.concatenate([space.low, [-np.inf] * box_additional_dim]) high = np.concatenate([space.high, [np.inf] * box_additional_dim]) return Box(low=low, high=high) return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def env_create(self): route = '/v1/envs/' data = {'env_id': 'lol'} resp = self._post_request(route, data) instance_id = resp['instance_id'] self.instance_id = instance_id asi = self.env_action_space_info(instance_id) obi = self.env_observation_space_info(instance_id) self.action_space = Box(np.array(asi['low']), np.array(asi['high'])) self.observation_space = Box(np.array(obi['low']), np.array(obi['high'])) self.spec = EnvSpec(self.action_space, self.observation_space) self.spec.id = 0 self.spec.timestep_limit = 500 self.instance_id = instance_id
def convert_gym_space(space, n_agents=1): if isinstance(space, gym.spaces.Box) or isinstance(space, Box): if len(space.shape) > 1: assert n_agents == 1, "multi-dimensional inputs for centralized agents not supported" return Box(low=np.min(space.low), high=np.max(space.high), shape=space.shape) else: return Box(low=np.min(space.low), high=np.max(space.high), shape=(space.shape[0] * n_agents, )) elif isinstance(space, gym.spaces.Discrete) or isinstance(space, Discrete): return Discrete(n=space.n**n_agents) else: raise NotImplementedError
def __init__(self, optimizer=None, optimizer_args=None, step_size=0.003, num_latents=6, latents=None, # some sort of iterable of the actual latent vectors period=10, # how often I choose a latent truncate_local_is_ratio=None, epsilon=0.1, train_pi_iters=10, use_skill_dependent_baseline=False, mlp_skill_dependent_baseline=False, freeze_manager=False, freeze_skills=False, **kwargs): if optimizer is None: if optimizer_args is None: # optimizer_args = dict() optimizer_args = dict(batch_size=None) self.optimizer = FirstOrderOptimizer(learning_rate=step_size, max_epochs=train_pi_iters, **optimizer_args) self.step_size = step_size self.truncate_local_is_ratio = truncate_local_is_ratio self.epsilon = epsilon super(Concurrent_PPO, self).__init__(**kwargs) # not sure if this line is correct self.num_latents = kwargs['policy'].latent_dim self.latents = latents self.period = period self.freeze_manager = freeze_manager self.freeze_skills = freeze_skills assert (not freeze_manager) or (not freeze_skills) # todo: fix this sampler stuff # import pdb; pdb.set_trace() self.sampler = HierBatchSampler(self, self.period) # self.sampler = BatchSampler(self) # i hope this is right self.diagonal = DiagonalGaussian(self.policy.low_policy.action_space.flat_dim) self.debug_fns = [] assert isinstance(self.policy, HierarchicalPolicy) if self.policy is not None: self.period = self.policy.period assert self.policy.period == self.period # self.old_policy = copy.deepcopy(self.policy) # skill dependent baseline self.use_skill_dependent_baseline = use_skill_dependent_baseline self.mlp_skill_dependent_baseline = mlp_skill_dependent_baseline if use_skill_dependent_baseline: curr_env = kwargs['env'] skill_dependent_action_space = curr_env.action_space new_obs_space_no_bi = curr_env.observation_space.shape[0] + 1 # 1 for the t_remaining skill_dependent_obs_space_dim = (new_obs_space_no_bi * (self.num_latents + 1) + self.num_latents,) skill_dependent_obs_space = Box(-1.0, 1.0, shape=skill_dependent_obs_space_dim) skill_dependent_env_spec = EnvSpec(skill_dependent_obs_space, skill_dependent_action_space) if self.mlp_skill_dependent_baseline: self.skill_dependent_baseline = GaussianMLPBaseline(env_spec=skill_dependent_env_spec) else: self.skill_dependent_baseline = LinearFeatureBaseline(env_spec=skill_dependent_env_spec)
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) else: raise NotImplementedError
def _set_action_space(self): max_torques = 0.5 * np.array([8, 7, 6, 5, 4, 3, 2]) self.joint_torque_high = max_torques self.joint_torque_low = -1 * max_torques if self.action_mode == 'position': delta_factor = 0.1 delta_high = delta_factor * np.ones(3) delta_low = delta_factor * -1 * np.ones(3) self._action_space = Box( delta_low, delta_high, ) else: self._action_space = Box(self.joint_torque_low, self.joint_torque_high)
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) # added for robotics enviroments elif isinstance(space, gym.spaces.Dict): b_low = np.concatenate((space.spaces["desired_goal"].low,space.spaces["achieved_goal"].low,space.spaces["observation"].low)) b_high = np.concatenate((space.spaces["desired_goal"].high,space.spaces["achieved_goal"].high,space.spaces["observation"].high)) name = Box(low=b_low, high=b_high) print(name) return name # end addition else: raise NotImplementedError
def __init__(self, env): self._env = env if isinstance(self.env.action_processor, base_environment.BaseDiscreteAction): self.action_space = spaces.Discrete(self.env.num_actions()) else: assert isinstance(self.env.action_processor, base_environment.BaseContinuousAction) self.action_space = Box( low = env.action_processor.minval(), high = env.action_processor.maxval(), shape=(env.action_processor.action_dim())) obsNdim = self.env.observation_ndim() obsKeys = list(obsNdim.keys()) assert len(obsKeys) == 1, 'gym only supports one observation type' self._obsKey = obsKeys[0] self.observation_space = Box(low=0, high=255, shape=obsNdim[obsKeys[0]])
def __init__(self, env): super().__init__(env) Serializable.quick_init(self, locals()) self.keys = ["robot-state", "object-state"] # set up observation and action spaces flat_ob = self._flatten_obs(self.env.reset(), verbose=True) self.obs_dim = flat_ob.size high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = Box(low=low, high=high) low, high = self.env.action_spec self.action_space = Box(low=low, high=high) self.spec = EnvSpec( observation_space=self.observation_space, action_space=self.action_space, )
def __init__( self, optimizer=None, optimizer_args=None, step_size=1e-2, num_latents=6, latents=None, # some sort of iterable of the actual latent vectors period=10, # how often I choose a latent truncate_local_is_ratio=None, use_skill_dependent_baseline=False, **kwargs): Serializable.quick_init(self, locals()) if optimizer is None: default_args = dict(batch_size=None, max_epochs=1) if optimizer_args is None: optimizer_args = default_args else: optimizer_args = dict(default_args, **optimizer_args) optimizer = FirstOrderOptimizer(learning_rate=step_size, **optimizer_args) self.optimizer = optimizer self.step_size = step_size self.truncate_local_is_ratio = truncate_local_is_ratio super(PG_concurrent_approx, self).__init__(**kwargs) # not sure if this line is correct self.num_latents = kwargs['policy'].latent_dim self.latents = latents self.period = period # todo: fix this sampler stuff self.sampler = HierBatchSampler(self, self.period) # i hope this is right self.diagonal = DiagonalGaussian( self.policy.low_policy.action_space.flat_dim) self.debug_fns = [] assert isinstance(self.policy, HierarchicalPolicy) if self.policy is not None: self.period = self.policy.period assert self.policy.period == self.period self.trainable_manager = self.policy.trainable_manager # skill dependent baseline self.use_skill_dependent_baseline = use_skill_dependent_baseline if use_skill_dependent_baseline: curr_env = kwargs['env'] skill_dependent_action_space = curr_env.action_space skill_dependent_obs_space_dim = ( (curr_env.observation_space.shape[0] + 1) * self.num_latents, ) skill_dependent_obs_space = Box( -1.0, 1.0, shape=skill_dependent_obs_space_dim) skill_depdendent_env_spec = EnvSpec(skill_dependent_obs_space, skill_dependent_action_space) self.skill_dependent_baseline = LinearFeatureBaseline( env_spec=skill_depdendent_env_spec)
def convert_gym_space(space): if isinstance(space, Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def __init__( self, update_hz=20, action_mode='torque', safety_box=True, reward='MSE', huber_delta=10, safety_force_magnitude=8, safety_force_temp=15, safe_reset_length=150, reward_magnitude=1, ): Serializable.quick_init(self, locals()) self.init_rospy(update_hz) self.safety_box_lows = np.array([-0.03900771, -0.32655392, -1]) self.safety_box_highs = np.array([0.79529649, 0.3227083, 1]) self.joint_names = [ 'right_j0', 'right_j1', 'right_j2', 'right_j3', 'right_j4', 'right_j5', 'right_j6' ] self.link_names = [ 'right_l2', 'right_l3', 'right_l4', 'right_l5', 'right_l6', '_hand' ] self.action_mode = action_mode self.reward_magnitude = reward_magnitude self.safety_box = safety_box self.safe_reset_length = safe_reset_length self.huber_delta = huber_delta self.safety_force_magnitude = safety_force_magnitude self.safety_force_temp = safety_force_temp self.AnglePDController = AnglePDController() max_torques = 0.5 * np.array([8, 7, 6, 5, 4, 3, 2]) self.joint_torque_high = max_torques self.joint_torque_low = -1 * max_torques self._action_space = Box(self.joint_torque_low, self.joint_torque_high) if reward == 'MSE': self.reward_function = self._MSE_reward elif reward == 'huber': self.reward_function = self._Huber_reward else: self.reward_function = self._Norm_reward self._set_action_space() self._set_observation_space() self.get_latest_pose_jacobian_dict() self.in_reset = True self.amplify = np.ones(7) * 2.5 self.pd_time_steps = 50 self.jacobian_pseudo_inverse_torques_scale = 10 self.damping_scale = 5
def __init__(self, env_name, dict_keys): Serializable.quick_init(self, locals()) super(FlattenDictWrapper, self).__init__(env_name) self.dict_keys = dict_keys size = 0 for key in dict_keys: shape = self.env.observation_space.spaces[key].shape size += np.prod(shape) self._observation_space = Box(-np.inf, np.inf, shape=(size, ))
def __init__(self, wrapped_env): Serializable.quick_init(self, locals()) self._wrapped_env = wrapped_env low, high = np.min( self._wrapped_env.spec.observation_space.low), np.max( self._wrapped_env.spec.observation_space.high) # assert len(wrapped_env.spec.observation_space.shape) == 1 shape = (self._wrapped_env.spec.observation_space.shape[0] + 1, ) self.obs_space = Box(low, high, shape)
def convert_gym_space(space): # import IPython; IPython.embed() if isinstance(space, gym.spaces.Box): return Box(low=0, high=255, shape=(80, 80)) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def __init__(self, dim=2, state_bounds=None, action_bounds=None, control_mode='linear', *args, **kwargs): """ :param dim: dimension of the position space. the obs will be 2*dim :param state_bounds: list or np.array of 2*dim with the ub of the state space :param action_bounds: list or np.array of 2*dim with the ub of the action space :param goal_generator: Proceedure to sample the and keep the goals :param reward_dist_threshold: :param control_mode: """ Serializable.quick_init(self, locals()) self.dim = dim self.control_mode = control_mode self.dt = 0.02 self.pos = np.zeros(dim) self.vel = np.zeros(dim) self.state_ub = UB * np.ones(self.dim * 2) if state_bounds is None else np.array(state_bounds) self.action_ub = UB * np.ones(self.dim) if action_bounds is None else np.array(action_bounds) self._observation_space = Box(-1 * self.state_ub, self.state_ub) self._action_space = Box(-1 * self.action_ub, self.action_ub)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._reward_values = list( range(1, int(self._max_reward_magnitude) + 1)) low = np.zeros(self._onehot_size + 1) low[-1] = -self._max_reward_magnitude high = np.ones(self._onehot_size + 1) high[-1] = self._max_reward_magnitude self._observation_space = Box(low, high)
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) elif isinstance(space, list): # For multiagent envs return list(map(convert_gym_space, space)) # TODO(cathywu) refactor multiagent envs to use gym.spaces.Tuple # (may be needed for pickling?) else: raise NotImplementedError
def goal_env_spec(self, dict_keys=('observation', 'desired_goal')): """ convert selected keys of a dict observation space to a Box space and return the corresponding env_spec. :param dict_keys: (`tuple`) desired keys that you would like to use. :return: (`EnvSpec`) converted object """ assert isinstance(self.observation_space, Dict) obs_dim = np.sum([self.observation_space.spaces[key].flat_dim for key in dict_keys]) obs_space = Box(-np.inf, np.inf, shape=(obs_dim,)) return EnvSpec( observation_space=obs_space, action_space=self.action_space, )
def convert_gym_space(space): """ Convert a gym.space to an rllab.space :param space: (obj:`gym.Space`) The Space object to convert :return: converted rllab.Space object """ if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) elif isinstance(space, gym.spaces.Dict): return Dict(space.spaces) else: raise TypeError
def _set_observation_space(self): lows = np.hstack(( JOINT_VALUE_LOW['position'], JOINT_VALUE_LOW['velocity'], JOINT_VALUE_LOW['torque'], END_EFFECTOR_VALUE_LOW['position'], END_EFFECTOR_VALUE_LOW['angle'], END_EFFECTOR_VALUE_LOW['position'], )) highs = np.hstack(( JOINT_VALUE_HIGH['position'], JOINT_VALUE_HIGH['velocity'], JOINT_VALUE_HIGH['torque'], END_EFFECTOR_VALUE_HIGH['position'], END_EFFECTOR_VALUE_HIGH['angle'], END_EFFECTOR_VALUE_HIGH['position'], )) self._observation_space = Box(lows, highs)
def action_space(self): return Box(low=-self.vel_bound, high=self.vel_bound, shape=(self.dynamics.a_dim, ))
def observation_space(self): return Box(low=np.array((self.xlim[0], self.ylim[0])), high=np.array((self.xlim[1], self.ylim[1])), shape=None)
return samples def get_action(self, t, observation, policy, **kwargs): #applying MC Dropout and taking the mean action? action, _ = policy.get_action(observation) mc_dropout = 10 all_actions = np.zeros(shape=(mc_dropout, action.shape[0])) for d in range(mc_dropout): action, _ = policy.get_action(observation) all_actions[d, :] = action mean_action = np.mean(all_actions, axis=0) return mean_action if __name__ == "__main__": ou = MCDropout( env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1, ))), mu=0, theta=0.15, sigma=0.3) states = [] for i in range(1000): states.append(ou.evolve_state()[0]) import matplotlib.pyplot as plt plt.plot(states) plt.show()
def _get_2D_Box_space(self): return Box( low=np.array((self.xlim[0], self.ylim[0])), high=np.array((self.xlim[1], self.ylim[1])), shape=None, )
def __init__( self, n=2, num_steps=10, reward_for_remembering=1, max_reward_magnitude=1, softmax_action=False, zero_observation=False, output_target_number=False, output_time=False, episode_boundary_flags=False, ): """ :param n: Number of different values that could be returned :param num_steps: How many steps the policy needs to output before the episode ends. AKA the horizon. So, the policy will see num_steps+1 total observations if you include the observation returned when reset() is called. The last observation will be the "terminal state" observation. :param reward_for_remembering: The reward bonus for remembering the number. This number is added to the usual reward if the correct number has the maximum probability. :param max_reward_magnitude: Clip the reward magnitude to this value. :param softmax_action: If true, put the action through a softmax. :param zero_observation: If true, all observations after the first will be just zeros (NOT the zero one-hot). :param episode_boundary_flags: If true, add a boolean flag to the observation for whether or not the episode is starting or terminating. """ assert max_reward_magnitude >= reward_for_remembering self.num_steps = num_steps self.n = n self._onehot_size = n + 1 """ Time step for the NEXT observation to be returned. env = OneCharMemory() # t == 0 after this line obs = env.reset() # t == 1 _ = env.step() # t == 2 _ = env.step() # t == 3 ... done = env.step()[2] # t == num_steps and done == False done = env.step()[2] # t == num_steps+1 and done == True """ self._t = 0 self._reward_for_remembering = reward_for_remembering self._max_reward_magnitude = max_reward_magnitude self._softmax_action = softmax_action self._zero_observation = zero_observation self._output_target_number = output_target_number self._output_time = output_time self._episode_boundary_flags = episode_boundary_flags self._action_space = Box(np.zeros(self._onehot_size), np.ones(self._onehot_size)) obs_low = np.zeros(self._onehot_size) obs_high = np.zeros(self._onehot_size) if self._output_target_number: obs_low = np.hstack((obs_low, [0])) obs_high = np.hstack((obs_high, [self.n])) if self._output_time: obs_low = np.hstack((obs_low, [0] * (self.num_steps + 1))) obs_high = np.hstack((obs_high, [1] * (self.num_steps + 1))) if self._episode_boundary_flags: obs_low = np.hstack((obs_low, [0] * 2)) obs_high = np.hstack((obs_high, [1] * 2)) self._observation_space = Box(obs_low, obs_high) self._target_number = None # For rendering self._last_reward = None self._last_action = None self._last_t = None
def action_space(self): return Box(low=-5.0, high=5.0, shape=(1,))