コード例 #1
0
 def __init__(self):
   self._step_counter = 0
   self._observables = {
       'twice': observable.Generic(FakePhysics.twice),
       'repeated': observable.Generic(FakePhysics.repeated, update_interval=5),
       'matrix': observable.Generic(FakePhysics.matrix, update_interval=3)
   }
コード例 #2
0
    def testNestedSpecsAndValues(self, list_or_tuple):
        observables = list_or_tuple(
            ({
                'one': observable.Generic(lambda _: 1.),
                'two': observable.Generic(lambda _: [2, 2]),
            },
             collections.OrderedDict([
                 ('three', observable.Generic(lambda _: np.full((2, 2), 3))),
                 ('four', observable.Generic(lambda _: [4.])),
                 ('five', observable.Generic(lambda _: 5)),
             ])))

        observables[0]['two'].enabled = True
        observables[1]['three'].enabled = True
        observables[1]['five'].enabled = True

        observation_updater = updater.Updater(observables)
        observation_updater.reset(physics=None, random_state=None)

        def make_spec(obs):
            array = np.array(obs.observation_callable(None, None)())
            return specs.ArraySpec((1, ) + array.shape, array.dtype)

        expected_specs = list_or_tuple(
            ({
                'two': make_spec(observables[0]['two'])
            },
             collections.OrderedDict([
                 ('three', make_spec(observables[1]['three'])),
                 ('five', make_spec(observables[1]['five']))
             ])))

        actual_specs = observation_updater.observation_spec()
        self.assertIs(type(actual_specs), type(expected_specs))
        for actual_dict, expected_dict in zip(actual_specs, expected_specs):
            self.assertIs(type(actual_dict), type(expected_dict))
            self.assertEqual(actual_dict, expected_dict)

        expected_values = list_or_tuple(
            ({
                'two': observables[0]['two'](physics=None, random_state=None)
            },
             collections.OrderedDict([
                 ('three', observables[1]['three'](physics=None,
                                                   random_state=None)),
                 ('five', observables[1]['five'](physics=None,
                                                 random_state=None))
             ])))

        actual_values = observation_updater.get_observation()
        self.assertIs(type(actual_values), type(expected_values))
        for actual_dict, expected_dict in zip(actual_values, expected_values):
            self.assertIs(type(actual_dict), type(expected_dict))
            self.assertLen(actual_dict, len(expected_dict))
            for actual, expected in zip(six.iteritems(actual_dict),
                                        six.iteritems(expected_dict)):
                actual_name, actual_value = actual
                expected_name, expected_value = expected
                self.assertEqual(actual_name, expected_name)
                np.testing.assert_array_equal(actual_value[0], expected_value)
コード例 #3
0
    def _add_player_observables_on_other(self, player, other, prefix):
        """Add observables of another player in this player's egocentric frame.

    Args:
      player: A `Walker` instance, the player we are adding observables to.
      other: A `Walker` instance corresponding to a different player.
      prefix: A string specifying a prefix to apply to the names of observables
        belonging to `player`.
    """
        if player is other:
            raise ValueError(
                'Cannot add egocentric observables of player on itself.')
        # Origin callable in xpos, xvel for `player`.
        xpos_xyz_callable = lambda p: p.bind(player.walker.root_body).xpos
        xvel_xyz_callable = lambda p: p.bind(player.walker.root_body).cvel[3:]

        # Egocentric observation of other's position, orientation and
        # linear velocities.
        def _cvel_observation(physics, other=other):
            # Velocitmeter reads in local frame but we need world frame observable
            # for egocentric transformation.
            return physics.bind(other.walker.root_body).cvel[3:]

        def _egocentric_end_effectors_xpos(physics, other=other):
            origin_xpos = xpos_xyz_callable(physics)
            egocentric_end_effectors_xpos = []
            for end_effector_body in other.walker.end_effectors:
                xpos = physics.bind(end_effector_body).xpos
                delta = xpos - origin_xpos
                ego_xpos = player.walker.transform_vec_to_egocentric_frame(
                    physics, delta)
                egocentric_end_effectors_xpos.append(ego_xpos)
            return np.concatenate(egocentric_end_effectors_xpos)

        player.walker.observables.add_egocentric_vector(
            '{}_ego_linear_velocity'.format(prefix),
            base_observable.Generic(_cvel_observation),
            origin_callable=xvel_xyz_callable)
        player.walker.observables.add_egocentric_vector(
            '{}_ego_position'.format(prefix),
            other.walker.observables.position,
            origin_callable=xpos_xyz_callable)
        player.walker.observables.add_egocentric_xmat(
            '{}_ego_orientation'.format(prefix),
            other.walker.observables.orientation)

        # Adds end effectors of the other agents in the player's egocentric frame.
        player.walker.observables.add_observable(
            '{}_ego_end_effectors_pos'.format(prefix),
            base_observable.Generic(_egocentric_end_effectors_xpos))

        # Adds end effectors of the other agents in the other's egocentric frame.
        # A is seeing B's hand extended to B's right.
        player.walker.observables.add_observable(
            '{}_end_effectors_pos'.format(prefix),
            other.walker.observables.end_effectors_pos)
コード例 #4
0
ファイル: observables.py プロジェクト: xgpeng/dm_control
  def __call__(self, task, player):
    """Adds observables to a player for the given task.

    Args:
      task: A `soccer.Task` instance.
      player: A `Walker` instance to which observables will be added.
    """

    def _stats_i_received_ball(unused_physics):
      if (task.ball.hit and task.ball.repossessed and
          task.ball.last_hit is player):
        return 1.
      return 0.

    player.walker.observables.add_observable(
        'stats_i_received_ball',
        base_observable.Generic(_stats_i_received_ball))

    def _stats_opponent_intercepted_ball(unused_physics):
      """Indicator on if an opponent intercepted the ball."""
      if (task.ball.hit and task.ball.intercepted and
          task.ball.last_hit.team != player.team):
        return 1.
      return 0.

    player.walker.observables.add_observable(
        'stats_opponent_intercepted_ball',
        base_observable.Generic(_stats_opponent_intercepted_ball))

    for dist in [5, 10, 15]:

      def _stats_i_received_ball_dist(physics, dist=dist):
        if (_stats_i_received_ball(physics) and
            task.ball.dist_between_last_hits is not None and
            task.ball.dist_between_last_hits > dist):
          return 1.
        return 0.

      player.walker.observables.add_observable(
          'stats_i_received_ball_%dm' % dist,
          base_observable.Generic(_stats_i_received_ball_dist))

      def _stats_opponent_intercepted_ball_dist(physics, dist=dist):
        if (_stats_opponent_intercepted_ball(physics) and
            task.ball.dist_between_last_hits is not None and
            task.ball.dist_between_last_hits > dist):
          return 1.
        return 0.

      player.walker.observables.add_observable(
          'stats_opponent_intercepted_ball_%dm' % dist,
          base_observable.Generic(_stats_opponent_intercepted_ball_dist))
コード例 #5
0
  def contact_forces(self):
    def clipped_contact_forces(physics):
      raw_contact_forces = physics.data.cfrc_ext
      contact_forces = np.clip(raw_contact_forces, -1.0, 1.0)
      return contact_forces.flat.copy()

    return observable.Generic(clipped_contact_forces)
コード例 #6
0
ファイル: jaco_arm.py プロジェクト: yangyi0318/dm_control
 def joints_pos(self):
   # Because most of the Jaco arm joints are unlimited, we return the joint
   # angles as sine/cosine pairs so that the observations are bounded.
   def get_sin_cos_joint_angles(physics):
     joint_pos = physics.bind(self._entity.joints).qpos
     return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T
   return observable.Generic(get_sin_cos_joint_angles)
コード例 #7
0
 def sensors_rangefinder(self):
   def tanh_rangefinder(physics):
     raw = physics.bind(self._entity.mjcf_model.sensor.rangefinder).sensordata
     raw = np.array(raw)
     raw[raw == -1.0] = np.inf
     return _RANGEFINDER_SCALE * np.tanh(raw / _RANGEFINDER_SCALE)
   return observable.Generic(tanh_rangefinder)
コード例 #8
0
  def __init__(
      self,
      walker: Callable[..., 'legacy_base.Walker'],
      arena: composer.Arena,
      ref_path: Text,
      ref_steps: Sequence[int],
      dataset: Union[Text, Sequence[Any]],
      termination_error_threshold: float = 0.3,
      min_steps: int = 10,
      reward_type: Text = 'termination_reward',
      physics_timestep: float = DEFAULT_PHYSICS_TIMESTEP,
      always_init_at_clip_start: bool = False,
      proto_modifier: Optional[Any] = None,
      ghost_offset: Optional[Sequence[Union[int, float]]] = None,
      body_error_multiplier: Union[int, float] = 1.0,
  ):
    """Mocap tracking task.

    Args:
      walker: Walker constructor to be used.
      arena: Arena to be used.
      ref_path: Path to the dataset containing reference poses.
      ref_steps: tuples of indices of reference observation. E.g if
        ref_steps=(1, 2, 3) the walker/reference observation at time t will
        contain information from t+1, t+2, t+3.
      dataset: dataset: A ClipCollection instance or a named dataset that
        appears as a key in DATASETS in datasets.py
      termination_error_threshold: Error threshold for episode terminations.
      min_steps: minimum number of steps within an episode. This argument
        determines the latest allowable starting point within a given reference
        trajectory.
      reward_type: type of reward to use, must be a string that appears as a key
        in the REWARD_FN dict in rewards.py.
      physics_timestep: Physics timestep to use for simulation.
      always_init_at_clip_start: only initialize epsidodes at the start of a
        reference trajectory.
      proto_modifier: Optional proto modifier to modify reference trajectories,
        e.g. adding a vertical offset.
      ghost_offset: if not None, include a ghost rendering of the walker with
        the reference pose at the specified position offset.
      body_error_multiplier: A multiplier that is applied to the body error term
        when determining failure termination condition.
    """
    super(MultiClipMocapTracking, self).__init__(
        walker=walker,
        arena=arena,
        ref_path=ref_path,
        ref_steps=ref_steps,
        termination_error_threshold=termination_error_threshold,
        min_steps=min_steps,
        dataset=dataset,
        reward_type=reward_type,
        physics_timestep=physics_timestep,
        always_init_at_clip_start=always_init_at_clip_start,
        proto_modifier=proto_modifier,
        ghost_offset=ghost_offset,
        body_error_multiplier=body_error_multiplier)
    self._walker.observables.add_observable(
        'time_in_clip',
        base_observable.Generic(self.get_normalized_time_in_clip))
コード例 #9
0
    def __init__(self,
                 walker,
                 arena,
                 walker_spawn_position=(0, 0, 0),
                 walker_spawn_rotation=None,
                 physics_timestep=0.005,
                 control_timestep=0.025):
        """Initializes this task.

    Args:
      walker: an instance of `locomotion.walkers.base.Walker`.
      arena: an instance of `locomotion.arenas`.
      walker_spawn_position: a sequence of 3 numbers, or a `composer.Variation`
        instance that generates such sequences, specifying the position at
        which the walker is spawned at the beginning of an episode.
      walker_spawn_rotation: a number, or a `composer.Variation` instance that
        generates a number, specifying the yaw angle offset (in radians) that is
        applied to the walker at the beginning of an episode.
      physics_timestep: a number specifying the timestep (in seconds) of the
        physics simulation.
      control_timestep: a number specifying the timestep (in seconds) at which
        the agent applies its control inputs (in seconds).
    """

        self._arena = arena
        self._walker = walker
        self._walker.create_root_joints(self._arena.attach(self._walker))
        self._walker_spawn_position = walker_spawn_position
        self._walker_spawn_rotation = walker_spawn_rotation

        enabled_observables = []
        enabled_observables += self._walker.observables.proprioception
        enabled_observables += self._walker.observables.kinematic_sensors
        enabled_observables += self._walker.observables.dynamic_sensors
        enabled_observables.append(self._walker.observables.sensors_touch)
        enabled_observables.append(self._walker.observables.egocentric_camera)
        for observable in enabled_observables:
            observable.enabled = True

        if 'CMUHumanoid' in str(type(self._walker)):
            core_body = 'walker/root'
            self._reward_body = 'walker/root'
        elif 'Rat' in str(type(self._walker)):
            core_body = 'walker/torso'
            self._reward_body = 'walker/head'
        else:
            raise ValueError('Expects Rat or CMUHumanoid.')

        def _origin(physics):
            """Returns origin position in the torso frame."""
            torso_frame = physics.named.data.xmat[core_body].reshape(3, 3)
            torso_pos = physics.named.data.xpos[core_body]
            return -torso_pos.dot(torso_frame)

        self._walker.observables.add_observable(
            'origin', base_observable.Generic(_origin))

        self.set_timesteps(physics_timestep=physics_timestep,
                           control_timestep=control_timestep)
コード例 #10
0
 def end_effectors_pos(self):
   """Position of end effectors relative to torso, in the egocentric frame."""
   def relative_pos_in_egocentric_frame(physics):
     end_effector = physics.bind(self._entity.end_effectors).xpos
     torso = physics.bind(self._entity.root_body).xpos
     xmat = np.reshape(physics.bind(self._entity.root_body).xmat, (3, 3))
     return np.reshape(np.dot(end_effector - torso, xmat), -1)
   return observable.Generic(relative_pos_in_egocentric_frame)
コード例 #11
0
    def orientation(self):
        def bodies_orientation(physics):
            """Compute relative orientation of the bodies."""
            # Get the bodies
            bodies = self._entity.bodies
            return np.stack(
                [physics.bind(bodies).xmat[1:, 0], physics.bind(bodies).xmat[1:, 2]]).T.ravel()

        return observable.Generic(bodies_orientation)
コード例 #12
0
  def add_egocentric_xmat(self, name, xmat_observable, enabled=True, **kwargs):

    def _egocentric(physics):
      return self._entity.transform_xmat_to_egocentric_frame(
          physics,
          xmat_observable.observation_callable(physics)())

    self._observables[name] = observable.Generic(_egocentric, **kwargs)
    self._observables[name].enabled = enabled
コード例 #13
0
ファイル: jaco_arm.py プロジェクト: yangyi0318/dm_control
 def joints_torque(self):
   # MuJoCo's torque sensors are 3-axis, but we are only interested in torques
   # acting about the axis of rotation of the joint. We therefore project the
   # torques onto the joint axis.
   def get_torques(physics):
     torques = physics.bind(self._entity.joint_torque_sensors).sensordata
     joint_axes = physics.bind(self._entity.joints).axis
     return np.einsum('ij,ij->i', torques.reshape(-1, 3), joint_axes)
   return observable.Generic(get_torques)
コード例 #14
0
 def appendages_pos(self):
   """Equivalent to `end_effectors_pos` with the head's position appended."""
   def relative_pos_in_egocentric_frame(physics):
     end_effectors_with_head = (
         self._entity.end_effectors + (self._entity.head,))
     end_effector = physics.bind(end_effectors_with_head).xpos
     torso = physics.bind(self._entity.root_body).xpos
     xmat = np.reshape(physics.bind(self._entity.root_body).xmat, (3, 3))
     return np.reshape(np.dot(end_effector - torso, xmat), -1)
   return observable.Generic(relative_pos_in_egocentric_frame)
コード例 #15
0
 def appendages_pos(self):
     """Equivalent to `end_effectors_pos` with the head's position appended."""
     def appendages_pos_in_egocentric_frame(physics):
         appendages = self._entity.end_effectors
         appendages_xpos = physics.bind(appendages).xpos
         root_xpos = physics.bind(self._entity.root_body).xpos
         root_xmat = np.reshape(physics.bind(self._entity.root_body).xmat, (3, 3))
         return np.reshape(
             np.dot(appendages_xpos - root_xpos, root_xmat), -1)
     return observable.Generic(appendages_pos_in_egocentric_frame)
コード例 #16
0
    def __init__(self, arena, arm, hand, num_bricks, randomize_initial_order,
                 randomize_desired_order, obs_settings, workspace,
                 control_timestep):
        """Initializes a new `Reassemble` task.

    Args:
      arena: `composer.Entity` instance.
      arm: `robot_base.RobotArm` instance.
      hand: `robot_base.RobotHand` instance.
      num_bricks: The total number of bricks; must be between 2 and 6.
      randomize_initial_order: Boolean specifying whether to randomize the
        initial order  of bricks in the stack at the start of each episode.
      randomize_desired_order: Boolean specifying whether to independently
        randomize the desired order of bricks in the stack at the start of each
        episode. By default the desired order will be the reverse of the initial
        order, with the exception of the base brick which is always the same as
        in the initial order since it is welded in place.
      obs_settings: `observations.ObservationSettings` instance.
      workspace: A `_BrickWorkspace` instance.
      control_timestep: Float specifying the control timestep in seconds.

    Raises:
      ValueError: If `num_bricks` is not between 2 and 6.
    """
        super(Reassemble, self).__init__(arena=arena,
                                         arm=arm,
                                         hand=hand,
                                         num_bricks=num_bricks,
                                         obs_settings=obs_settings,
                                         workspace=workspace,
                                         control_timestep=control_timestep)
        self._randomize_initial_order = randomize_initial_order
        self._randomize_desired_order = randomize_desired_order

        # Randomized at the start of each episode if `randomize_initial_order` is
        # True.
        self._initial_order = np.arange(num_bricks)

        # Randomized at the start of each episode if `randomize_desired_order` is
        # True.
        self._desired_order = self._initial_order.copy()
        self._desired_order[1:] = self._desired_order[-1:0:-1]

        # In the random order case, create a `prop_pose` observable that informs the
        # agent of the desired order.
        if randomize_desired_order:
            desired_order_observable = observable.Generic(
                self._get_desired_order)
            desired_order_observable.configure(
                **obs_settings.prop_pose._asdict())
            self._task_observables['desired_order'] = desired_order_observable

        # Distributions of positions and orientations for the base of the stack.
        self._base_pos = distributions.Uniform(*workspace.prop_bbox)
        self._base_quat = workspaces.uniform_z_rotation
コード例 #17
0
    def __init__(self, arena, arm, hand, num_bricks, target_height,
                 moveable_base, randomize_order, obs_settings, workspace,
                 control_timestep):
        """Initializes a new `Stack` task.

    Args:
      arena: `composer.Entity` instance.
      arm: `robot_base.RobotArm` instance.
      hand: `robot_base.RobotHand` instance.
      num_bricks: The total number of bricks; must be between 2 and 6.
      target_height: The target number of bricks in the stack in order to get
        maximum reward. Must be between 2 and `num_bricks`.
      moveable_base: Boolean specifying whether or not the bottom brick should
        be moveable.
      randomize_order: Boolean specifying whether to randomize the desired order
        of bricks in the stack at the start of each episode.
      obs_settings: `observations.ObservationSettings` instance.
      workspace: A `_BrickWorkspace` instance.
      control_timestep: Float specifying the control timestep in seconds.

    Raises:
      ValueError: If `num_bricks` is not between 2 and 6, or if
        `target_height` is not between 2 and `num_bricks - 1`.
    """
        if not 2 <= target_height <= num_bricks:
            raise ValueError(
                '`target_height` must be between 2 and {}, got {}.'.format(
                    num_bricks, target_height))

        super(Stack, self).__init__(arena=arena,
                                    arm=arm,
                                    hand=hand,
                                    num_bricks=num_bricks,
                                    obs_settings=obs_settings,
                                    workspace=workspace,
                                    control_timestep=control_timestep)

        self._moveable_base = moveable_base
        self._randomize_order = randomize_order
        self._target_height = target_height
        self._prop_bbox = workspace.prop_bbox

        # Shuffled at the start of each episode if `randomize_order` is True.
        self._desired_order = np.arange(target_height)

        # In the random order case, create a `prop_pose` observable that informs the
        # agent of the desired order.
        if randomize_order:
            desired_order_observable = observable.Generic(
                self._get_desired_order)
            desired_order_observable.configure(
                **obs_settings.prop_pose._asdict())
            self._task_observables['desired_order'] = desired_order_observable
コード例 #18
0
ファイル: observables.py プロジェクト: zwbgood6/dm_control
    def _add_player_arena_observables(self, player, arena):
        """Add observables of the arena.

    Args:
      player: A `Walker` instance to which observables will be added.
      arena: A `Pitch` instance.
    """
        # Enable egocentric view of position detectors (goal, field).
        # Corners named according to walker *facing towards opponent goal*.
        clockwise_names = [
            'team_goal_back_right',
            'team_goal_mid',
            'team_goal_front_left',
            'field_front_left',
            'opponent_goal_back_left',
            'opponent_goal_mid',
            'opponent_goal_front_right',
            'field_back_right',
        ]
        clockwise_features = [
            lambda _: arena.home_goal.lower[:2],
            lambda _: arena.home_goal.mid,
            lambda _: arena.home_goal.upper[:2],
            lambda _: arena.field.upper,
            lambda _: arena.away_goal.upper[:2],
            lambda _: arena.away_goal.mid,
            lambda _: arena.away_goal.lower[:2],
            lambda _: arena.field.lower,
        ]
        xpos_xyz_callable = lambda p: p.bind(player.walker.root_body).xpos
        xpos_xy_callable = lambda p: p.bind(player.walker.root_body).xpos[:2]
        # A list of egocentric reference origin for each one of clockwise_features.
        clockwise_origins = [
            xpos_xy_callable,
            xpos_xyz_callable,
            xpos_xy_callable,
            xpos_xy_callable,
            xpos_xy_callable,
            xpos_xyz_callable,
            xpos_xy_callable,
            xpos_xy_callable,
        ]
        if player.team != team_lib.Team.HOME:
            half = len(clockwise_features) // 2
            clockwise_features = clockwise_features[
                half:] + clockwise_features[:half]
            clockwise_origins = clockwise_origins[
                half:] + clockwise_origins[:half]

        for name, feature, origin in zip(clockwise_names, clockwise_features,
                                         clockwise_origins):
            player.walker.observables.add_egocentric_vector(
                name, base_observable.Generic(feature), origin_callable=origin)
コード例 #19
0
 def appendages_pos(self):
   """Equivalent to `end_effectors_pos` with the head's position appended."""
   def relative_pos_in_egocentric_frame(physics):
     if hasattr(self._entity, 'head'):
       appendages = (tuple(self._entity.end_effectors) + (self._entity.head,))
     else:
       appendages = self._entity.end_effectors
     appendages_pos_global = physics.bind(appendages).xpos
     torso = physics.bind(self._entity.root_body).xpos
     xmat = np.reshape(physics.bind(self._entity.root_body).xmat, (3, 3))
     return np.reshape(np.dot(appendages_pos_global - torso, xmat), -1)
   return observable.Generic(relative_pos_in_egocentric_frame)
コード例 #20
0
    def __init__(self, observation_settings, opponent, game_logic, board,
                 markers):
        """Initializes the task.

    Args:
      observation_settings: An `observations.ObservationSettings` namedtuple
        specifying configuration options for each category of observation.
      opponent: Opponent used for generating opponent moves.
      game_logic: Logic for keeping track of the logical state of the board.
      board: Board to use.
      markers: Markers to use.
    """
        self._game_logic = game_logic
        self._game_opponent = opponent
        arena = arenas.Standard(observable_options=observations.make_options(
            observation_settings, observations.ARENA_OBSERVABLES))
        arena.attach(board)
        arm = kinova.JacoArm(observable_options=observations.make_options(
            observation_settings, observations.JACO_ARM_OBSERVABLES))
        hand = kinova.JacoHand(observable_options=observations.make_options(
            observation_settings, observations.JACO_HAND_OBSERVABLES))
        arm.attach(hand)
        arena.attach_offset(arm, offset=(0, _ARM_Y_OFFSET, 0))
        arena.attach(markers)

        # Geoms belonging to the arm and hand are placed in a custom group in order
        # to disable their visibility to the top-down camera. NB: we assume that
        # there are no other geoms in ROBOT_GEOM_GROUP that don't belong to the
        # robot (this is usually the case since the default geom group is 0). If
        # there are then these will also be invisible to the top-down camera.
        for robot_geom in arm.mjcf_model.find_all('geom'):
            robot_geom.group = arenas.ROBOT_GEOM_GROUP

        self._arena = arena
        self._board = board
        self._arm = arm
        self._hand = hand
        self._markers = markers
        self._tcp_initializer = initializers.ToolCenterPointInitializer(
            hand=hand,
            arm=arm,
            position=distributions.Uniform(_TCP_LOWER_BOUNDS,
                                           _TCP_UPPER_BOUNDS),
            quaternion=_uniform_downward_rotation())

        # Add an observable exposing the logical state of the board.
        board_state_observable = observable.Generic(
            lambda physics: self._game_logic.get_board_state())
        board_state_observable.configure(
            **observation_settings.board_state._asdict())
        self._task_observables = {'board_state': board_state_observable}
コード例 #21
0
 def bodies_pos(self):
     """Position of bodies relative to root, in the egocentric frame."""
     def bodies_pos_in_egocentric_frame(physics):
         """Compute relative orientation of the bodies."""
         # Get the bodies
         bodies = self._entity.bodies
         # Get the positions of all the bodies & root in the global frame
         bodies_xpos = physics.bind(bodies).xpos
         root_xpos, _ = self._entity.get_pose(physics)
         # Compute the relative position of the bodies in the root frame
         root_xmat = np.reshape(physics.bind(self._entity.root_body).xmat, (3, 3))
         return np.reshape(
             np.dot(bodies_xpos - root_xpos, root_xmat), -1)
     return observable.Generic(bodies_pos_in_egocentric_frame)
コード例 #22
0
    def __init__(self,
                 walker,
                 maze_arena,
                 target=None,
                 target_reward_scale=1.0,
                 randomize_spawn_position=True,
                 randomize_spawn_rotation=True,
                 rotation_bias_factor=0,
                 aliveness_reward=0.0,
                 aliveness_threshold=DEFAULT_ALIVE_THRESHOLD,
                 contact_termination=True,
                 max_repeats=0,
                 enable_global_task_observables=False,
                 physics_timestep=DEFAULT_PHYSICS_TIMESTEP,
                 control_timestep=DEFAULT_CONTROL_TIMESTEP,
                 regenerate_maze_on_repeat=False):
        super(RepeatSingleGoalMaze, self).__init__(
            walker=walker,
            maze_arena=maze_arena,
            randomize_spawn_position=randomize_spawn_position,
            randomize_spawn_rotation=randomize_spawn_rotation,
            rotation_bias_factor=rotation_bias_factor,
            aliveness_reward=aliveness_reward,
            aliveness_threshold=aliveness_threshold,
            contact_termination=contact_termination,
            enable_global_task_observables=enable_global_task_observables,
            physics_timestep=physics_timestep,
            control_timestep=control_timestep)
        if target is None:
            target = target_sphere.TargetSphere()
        self._target = target
        self._rewarded_this_step = False
        self._maze_arena.attach(target)
        self._target_reward_scale = target_reward_scale
        self._max_repeats = max_repeats
        self._targets_obtained = 0
        self._regenerate_maze_on_repeat = regenerate_maze_on_repeat

        if enable_global_task_observables:
            xpos_origin_callable = lambda phys: phys.bind(walker.root_body
                                                          ).xpos

            def _target_pos(physics, target=target):
                return physics.bind(target.geom).xpos

            walker.observables.add_egocentric_vector(
                'target_0',
                observable_lib.Generic(_target_pos),
                origin_callable=xpos_origin_callable)
コード例 #23
0
  def add_egocentric_vector(self,
                            name,
                            world_frame_observable,
                            enabled=True,
                            origin_callable=None,
                            **kwargs):

    def _egocentric(physics, origin_callable=origin_callable):
      vec = world_frame_observable.observation_callable(physics)()
      origin_callable = origin_callable or (lambda physics: np.zeros(vec.size))
      delta = vec - origin_callable(physics)
      return self._entity.transform_vec_to_egocentric_frame(physics, delta)

    self._observables[name] = observable.Generic(_egocentric, **kwargs)
    self._observables[name].enabled = enabled
コード例 #24
0
    def bodies_quats(self):
        """Orientations of the bodies as quaternions, in the egocentric frame."""
        def bodies_orientations_in_egocentric_frame(physics):
            """Compute relative orientation of the bodies."""
            # Get the bodies
            bodies = self._entity.bodies
            # Get the quaternions of all the bodies &root in the global frame
            bodies_xquat = physics.bind(bodies).xquat
            root_xquat = physics.bind(self._entity.root_body).xquat
            # Compute the relative quaternion of the bodies in the root frame
            bodies_quat_diff = transformations.quat_diff(
                np.tile(root_xquat, len(bodies)).reshape(-1, 4),
                bodies_xquat)  # q1^-1 * q2
            return np.reshape(bodies_quat_diff, -1)

        return observable.Generic(bodies_orientations_in_egocentric_frame)
コード例 #25
0
 def bodies_quats(self):
     """Orientations of the bodies as quaternions, in the egocentric frame."""
     def bodies_orientations_in_egocentric_frame(physics):
         """Compute relative orientation of the bodies."""
         # Get the bodies
         bodies = self._entity.bodies
         # Get the quaternions of all the bodies &root in the global frame
         bodies_xquat = physics.bind(bodies).xquat
         root_xquat = physics.bind(self._entity.root_body).xquat
         # Compute the relative quaternion of the bodies in the root frame
         bodies_quat_diff = []
         for k in range(len(bodies)):
             bodies_quat_diff.append(
                 mjmath.mj_quatdiff(root_xquat, bodies_xquat[k]))  # q1^-1 * q2
         return np.reshape(np.stack(bodies_quat_diff, 0), -1)
     return observable.Generic(bodies_orientations_in_egocentric_frame)
コード例 #26
0
    def testObservationSpecInference(self):
        physics = fake_physics.FakePhysics()
        physics.observables['repeated'].buffer_size = 5
        physics.observables['matrix'].buffer_size = 4
        physics.observables['sqrt'] = observable.Generic(
            fake_physics.FakePhysics.sqrt, buffer_size=3)

        for obs in six.itervalues(physics.observables):
            obs.enabled = True

        observation_updater = updater.Updater(physics.observables)
        observation_updater.reset(physics=physics, random_state=None)

        spec = observation_updater.observation_spec()
        self.assertCorrectSpec(spec['repeated'], (5, 2), np.int, 'repeated')
        self.assertCorrectSpec(spec['matrix'], (4, 2, 3), np.int, 'matrix')
        self.assertCorrectSpec(spec['sqrt'], (3, ), np.float, 'sqrt')
コード例 #27
0
 def body_quats(self):
   """Orientations of the bodies as quaternions, in the egocentric frame."""
   def body_orientations_in_egocentric_frame(physics):
     """Compute relative orientation of the bodies."""
     # Get the bodies
     if hasattr(self._entity, 'mocap_bodies'):
       bodies = self._entity.mocap_bodies
     elif hasattr(self._entity, 'bodies'):
       bodies = self._entity.bodies[1:]  # remove presumed root
     else:
       bodies = self._entity.mjcf_model.find_all('body')
     # Get the quaternions of all the bodies in the global frame
     bodies_xquat = physics.bind(bodies).xquat
     root_xquat = physics.bind(self._entity.root_body).xquat
     # Compute the relative quaternion of the bodies in the torso frame
     bodies_quat_diff = []
     for k in range(len(bodies)):
       bodies_quat_diff.append(
           mjmath.mj_quatdiff(root_xquat, bodies_xquat[k]))  # q1^-1 * q2
     return np.reshape(np.stack(bodies_quat_diff, 0), -1)
   return observable.Generic(body_orientations_in_egocentric_frame)
コード例 #28
0
  def __init__(self, board_size, observation_settings, opponent=None,
               reset_arm_after_move=True):
    """Initializes a `Go` task.

    Args:
      board_size: board size
      observation_settings: An `observations.ObservationSettings` namedtuple
        specifying configuration options for each category of observation.
      opponent: Go opponent to use for the opponent player actions.
      reset_arm_after_move: Whether to reset arm to random position after every
        piece being placed on the board.
    """
    game_logic = go_logic.GoGameLogic(board_size=board_size)

    if opponent is None:
      opponent = go_logic.GoGTPOpponent(board_size=board_size,
                                        mixture_p=_DEFAULT_OPPONENT_MIXTURE)

    self._last_valid_move_is_pass = False
    super(Go, self).__init__(observation_settings=observation_settings,
                             opponent=opponent,
                             game_logic=game_logic,
                             board=boards.GoBoard(boardsize=board_size),
                             markers=pieces.Markers(
                                 player_colors=(_BLACK, _WHITE),
                                 halfwidth=_GO_PIECE_SIZE,
                                 num_per_player=board_size*board_size*2,
                                 observable_options=observations.make_options(
                                     observation_settings,
                                     observations.MARKER_OBSERVABLES),
                                 board_size=board_size))
    self._reset_arm_after_move = reset_arm_after_move
    # Add an observable exposing the move history (to reconstruct game states)
    move_history_observable = observable.Generic(
        lambda physics: self._game_logic.get_move_history())
    move_history_observable.configure(
        **observation_settings.board_state._asdict())
    self._task_observables['move_history'] = move_history_observable
コード例 #29
0
  def camera_joints_pos(self):

    def _sin(value, random_state):
      del random_state
      return np.sin(value)

    def _cos(value, random_state):
      del random_state
      return np.cos(value)

    sin_rotation_joints = observable.MJCFFeature(
        'qpos', self._entity.observable_camera_joints, corruptor=_sin)

    cos_rotation_joints = observable.MJCFFeature(
        'qpos', self._entity.observable_camera_joints, corruptor=_cos)

    def _camera_joints(physics):
      return np.concatenate([
          sin_rotation_joints(physics),
          cos_rotation_joints(physics)
      ], -1)

    return observable.Generic(_camera_joints)
コード例 #30
0
    def testVariableRatesAndDelays(self):
        physics = fake_physics.FakePhysics()
        physics.observables['time'] = observable.Generic(
            lambda physics: physics.time(),
            buffer_size=3,
            # observations produced on step numbers 20*N + [0, 3, 5, 8, 11, 15, 16]
            update_interval=DeterministicSequence([3, 2, 3, 3, 4, 1, 4]),
            # observations arrive on step numbers 20*N + [3, 8, 7, 12, 11, 17, 20]
            delay=DeterministicSequence([3, 5, 2, 5, 1, 2, 4]))
        physics.observables['time'].enabled = True

        physics_steps_per_control_step = 10
        observation_updater = updater.Updater(physics.observables,
                                              physics_steps_per_control_step)
        observation_updater.reset(physics=physics, random_state=None)

        # Run through a few cycles of the variation sequences to make sure that
        # cross-control-boundary behaviour is correct.
        for i in range(5):
            observation_updater.prepare_for_next_control_step()
            for _ in range(physics_steps_per_control_step):
                physics.step()
                observation_updater.update()
            np.testing.assert_array_equal(
                observation_updater.get_observation()['time'],
                20 * i + np.array([0, 5, 3]))

            observation_updater.prepare_for_next_control_step()
            for _ in range(physics_steps_per_control_step):
                physics.step()
                observation_updater.update()
            # Note that #11 is dropped since it arrives after #8,
            # whose large delay caused it to cross the control step boundary at #10.
            np.testing.assert_array_equal(
                observation_updater.get_observation()['time'],
                20 * i + np.array([8, 15, 16]))