Beispiel #1
0
 def discount_spec(self) -> t.Tuple[specs.BoundedArray, specs.BoundedArray]:
     return (specs.BoundedArray(shape=(),
                                dtype=float,
                                minimum=0.,
                                maximum=1.,
                                name='P-discount'),
             specs.BoundedArray(shape=(),
                                dtype=float,
                                minimum=0.,
                                maximum=1.,
                                name='D-discount'))
Beispiel #2
0
    def __init__(self, env):
        self._env = env
        self._obs_spec = OrderedDict()
        wrapped_obs_spec = env.observation_spec().copy()
        dim = 0
        for key in wrapped_obs_spec.keys():
            if key != MANIP_PIXELS_KEY:
                spec = wrapped_obs_spec[key]
                assert spec.dtype == np.float64
                assert type(spec) == specs.Array
                dim += np.prod(spec.shape)

        self._obs_spec['features'] = specs.Array(shape=(dim, ),
                                                 dtype=np.float32,
                                                 name='features')

        if MANIP_PIXELS_KEY in wrapped_obs_spec:
            spec = wrapped_obs_spec[MANIP_PIXELS_KEY]
            self._obs_spec['pixels'] = specs.BoundedArray(shape=spec.shape[1:],
                                                          dtype=spec.dtype,
                                                          minimum=spec.minimum,
                                                          maximum=spec.maximum,
                                                          name='pixels')
        self._obs_spec['state'] = specs.Array(
            shape=self._env.physics.get_state().shape,
            dtype=np.float32,
            name='state')
Beispiel #3
0
    def __init__(self, scaling_factor=1., action_layers='agent',
                 constrained_lr=False, control_velocity=False, momentum=0.):
        """Constructor.
        
        Args:
            scaling_factor: Scalar. Scaling factor multiplied to the action.
            agent_layer: String or iterable of strings. Elements (or itself if
                string) must be keys in the environment state. All sprites in
                these layers will be acted upon by this action space.
            control_velocity: Bool. Whether to control velocity (True) or force
                (False).
            constrained_lr: Bool. If True, joystick is contrained to actions
                parallel to the x-axis, by zeroing out the y-axis (component 1)
                of the action.
            momentum: Float in [0, 1]. Discount factor for previous action. This
                should be zero if control_velocity is False, because imparting
                forces automatically gives momentum to the agent(s) being
                controlled. If control_velocity is True, setting this greater
                than zero gives the controlled agent(s) momentum. However, the
                velocity is clipped at scaling_factor, so the agent only retains
                momentum when stopping or changing direction and does not
                accelerate.
        """
        self._scaling_factor = scaling_factor
        if not isinstance(action_layers, (list, tuple)):
            action_layers = (action_layers,)
        self._action_layers = action_layers
        self._constrained_lr = constrained_lr
        self._control_velocity = control_velocity
        self._momentum = momentum

        self._action_spec = specs.BoundedArray(
            shape=(2,), dtype=np.float32, minimum=-1, maximum=1)
Beispiel #4
0
    def action_spec(self, physics):
        """Returns a `BoundedArray` matching the `physics` actuators."""

        return specs.BoundedArray(shape=(3, ),
                                  dtype=np.float,
                                  minimum=[-1.0, -1.0, -1.0],
                                  maximum=[1.0, 1.0, 1.0])
Beispiel #5
0
 def action_spec(self):
     """Returns the action spec.
     """
     return specs.BoundedArray(
         shape=(1,), dtype=int,
         minimum=[self._actions[0]], maximum=[self._actions[-1]],
         name="buy_sell_action"
     )
Beispiel #6
0
 def action_spec(self, physics):
     spec = physics.action_spec()
     if self.relative_step:
         spec = specs.BoundedArray(shape=(self.DOF,), dtype=np.float, 
                                                        minimum=np.ones(self.DOF)*-self.relative_rad_max, 
                                                        maximum=np.ones(self.DOF)*self.relative_rad_max)
         return spec
     else:
         # TODO this will only work if we are using Mujoco - add to robot
         spec = physics.action_spec()
         if spec.shape[0] == self.DOF:
             return spec
         # sometimes we only want to control a few joints
         elif spec.shape[0] > self.DOF:
             return specs.BoundedArray(shape=(self.DOF,), dtype=np.float, 
                     minimum=spec.minimum[:self.DOF], 
                     maximum=spec.maximum[:self.DOF])
         else:
             raise NotImplemented