def discount_spec(self) -> t.Tuple[specs.BoundedArray, specs.BoundedArray]: return (specs.BoundedArray(shape=(), dtype=float, minimum=0., maximum=1., name='P-discount'), specs.BoundedArray(shape=(), dtype=float, minimum=0., maximum=1., name='D-discount'))
def __init__(self, env): self._env = env self._obs_spec = OrderedDict() wrapped_obs_spec = env.observation_spec().copy() dim = 0 for key in wrapped_obs_spec.keys(): if key != MANIP_PIXELS_KEY: spec = wrapped_obs_spec[key] assert spec.dtype == np.float64 assert type(spec) == specs.Array dim += np.prod(spec.shape) self._obs_spec['features'] = specs.Array(shape=(dim, ), dtype=np.float32, name='features') if MANIP_PIXELS_KEY in wrapped_obs_spec: spec = wrapped_obs_spec[MANIP_PIXELS_KEY] self._obs_spec['pixels'] = specs.BoundedArray(shape=spec.shape[1:], dtype=spec.dtype, minimum=spec.minimum, maximum=spec.maximum, name='pixels') self._obs_spec['state'] = specs.Array( shape=self._env.physics.get_state().shape, dtype=np.float32, name='state')
def __init__(self, scaling_factor=1., action_layers='agent', constrained_lr=False, control_velocity=False, momentum=0.): """Constructor. Args: scaling_factor: Scalar. Scaling factor multiplied to the action. agent_layer: String or iterable of strings. Elements (or itself if string) must be keys in the environment state. All sprites in these layers will be acted upon by this action space. control_velocity: Bool. Whether to control velocity (True) or force (False). constrained_lr: Bool. If True, joystick is contrained to actions parallel to the x-axis, by zeroing out the y-axis (component 1) of the action. momentum: Float in [0, 1]. Discount factor for previous action. This should be zero if control_velocity is False, because imparting forces automatically gives momentum to the agent(s) being controlled. If control_velocity is True, setting this greater than zero gives the controlled agent(s) momentum. However, the velocity is clipped at scaling_factor, so the agent only retains momentum when stopping or changing direction and does not accelerate. """ self._scaling_factor = scaling_factor if not isinstance(action_layers, (list, tuple)): action_layers = (action_layers,) self._action_layers = action_layers self._constrained_lr = constrained_lr self._control_velocity = control_velocity self._momentum = momentum self._action_spec = specs.BoundedArray( shape=(2,), dtype=np.float32, minimum=-1, maximum=1)
def action_spec(self, physics): """Returns a `BoundedArray` matching the `physics` actuators.""" return specs.BoundedArray(shape=(3, ), dtype=np.float, minimum=[-1.0, -1.0, -1.0], maximum=[1.0, 1.0, 1.0])
def action_spec(self): """Returns the action spec. """ return specs.BoundedArray( shape=(1,), dtype=int, minimum=[self._actions[0]], maximum=[self._actions[-1]], name="buy_sell_action" )
def action_spec(self, physics): spec = physics.action_spec() if self.relative_step: spec = specs.BoundedArray(shape=(self.DOF,), dtype=np.float, minimum=np.ones(self.DOF)*-self.relative_rad_max, maximum=np.ones(self.DOF)*self.relative_rad_max) return spec else: # TODO this will only work if we are using Mujoco - add to robot spec = physics.action_spec() if spec.shape[0] == self.DOF: return spec # sometimes we only want to control a few joints elif spec.shape[0] > self.DOF: return specs.BoundedArray(shape=(self.DOF,), dtype=np.float, minimum=spec.minimum[:self.DOF], maximum=spec.maximum[:self.DOF]) else: raise NotImplemented