def _init():
        env_config = solo8v2vanilla.Solo8VanillaConfig()
        env = gym.make('solo8vanilla-v0',
                       config=env_config,
                       normalize_actions=False)

        env.obs_factory.register_observation(obs.TorsoIMU(env.robot))
        env.obs_factory.register_observation(obs.MotorEncoder(env.robot))
        env.termination_factory.register_termination(
            terms.TimeBasedTermination(length))

        stand_reward = rewards.AdditiveReward()
        stand_reward.client = env.client

        stand_reward.add_term(0.5, rewards.FlatTorsoReward(env.robot))
        stand_reward.add_term(
            0.5, rewards.TorsoHeightReward(env.robot, quad_standing_height))

        home_pos_reward = rewards.MultiplicitiveReward(
            1, stand_reward, rewards.SmallControlReward(env.robot),
            rewards.HorizontalMoveSpeedReward(env.robot, 0))
        home_pos_reward.client = env.client

        env.reward_factory.register_reward(1, home_pos_reward)

        return env
Esempio n. 2
0
    def test_client_passthrough(self):
        client = "client"
        sub_r0 = ReflectiveReward(1)
        sub_r1 = ReflectiveReward(1)

        r = rewards.MultiplicitiveReward(1, sub_r0, sub_r1)
        r.client = client

        self.assertEqual(sub_r0.client, client)
        self.assertEqual(sub_r1.client, client)
        self.assertEqual(r.client, client)
Esempio n. 3
0
    def _init():
        env_config = solo8v2vanilla.Solo8VanillaConfig()
        env_config.max_motor_rotation = max_motor_rot
        env = gym.make('solo8vanilla-v0',
                       config=env_config,
                       normalize_actions=True,
                       normalize_observations=True)

        env.obs_factory.register_observation(obs.TorsoIMU(env.robot))
        env.obs_factory.register_observation(
            obs.MotorEncoder(env.robot, max_rotation=max_motor_rot))
        env.termination_factory.register_termination(
            terms.TimeBasedTermination(length))

        stand_reward = rewards.AdditiveReward()
        stand_reward.client = env.client

        stand_reward.add_term(
            0.5,
            rewards.FlatTorsoReward(env.robot,
                                    hard_margin=fhm,
                                    soft_margin=fsm))
        stand_reward.add_term(
            0.5,
            rewards.TorsoHeightReward(env.robot,
                                      stand_height,
                                      hard_margin=thm,
                                      soft_margin=tsm))

        home_pos_reward = rewards.MultiplicitiveReward(
            1, stand_reward, rewards.SmallControlReward(env.robot, margin=scm),
            rewards.HorizontalMoveSpeedReward(env.robot,
                                              0,
                                              hard_margin=hmhm,
                                              soft_margin=hmsm))

        env.reward_factory.register_reward(1, home_pos_reward)
        return env
  env.obs_factory.register_observation(obs.TorsoIMU(env.robot))
  env.termination_factory.register_termination(terms.PerpetualTermination())

  flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=np.pi)
  height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.025, 0.15)
  
  small_control = rewards.SmallControlReward(env.robot, margin=10)
  no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5, 
                                              soft_margin=3)
  
  stand = rewards.AdditiveReward()
  stand.client = env.client
  stand.add_term(0.5, flat)
  stand.add_term(0.5, height)

  home_pos = rewards.MultiplicitiveReward(1, stand, small_control, no_move)
  env.reward_factory.register_reward(1, home_pos)

  joint_params = []
  num_joints = env.client.getNumJoints(env.robot)

  for joint in range(num_joints):
    joint_params.append(env.client.addUserDebugParameter(
      'Joint {}'.format(
        env.client.getJointInfo(env.robot, joint)[1].decode('UTF-8')),
      -2 * np.pi, 2 * np.pi, 0))

  camera_params = {
    'fov': env.client.addUserDebugParameter('fov', 30, 150, 80),
    'distance': env.client.addUserDebugParameter('distance', .1, 5, 1.5),
    'yaw': env.client.addUserDebugParameter('yaw', -90, 90, 0),
Esempio n. 5
0
 def test_compute(self, name, coeff, values, expected_result):
     sub_r = (ReflectiveReward(v) for v in values)
     r = rewards.MultiplicitiveReward(coeff, *sub_r)
     r.client = 'client'
     self.assertEqual(r.compute(), expected_result)
Esempio n. 6
0
    def test_empty(self):
        r = rewards.MultiplicitiveReward(0)
        self.assertTupleEqual(r._terms, ())

        with self.assertRaises(ValueError):
            r.compute()