def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term(0.5, rewards.FlatTorsoReward(env.robot)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, quad_standing_height)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot), rewards.HorizontalMoveSpeedReward(env.robot, 0)) home_pos_reward.client = env.client env.reward_factory.register_reward(1, home_pos_reward) return env
def test_client_passthrough(self): client = "client" sub_r0 = ReflectiveReward(1) sub_r1 = ReflectiveReward(1) r = rewards.MultiplicitiveReward(1, sub_r0, sub_r1) r.client = client self.assertEqual(sub_r0.client, client) self.assertEqual(sub_r1.client, client) self.assertEqual(r.client, client)
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env_config.max_motor_rotation = max_motor_rot env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=True, normalize_observations=True) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation( obs.MotorEncoder(env.robot, max_rotation=max_motor_rot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term( 0.5, rewards.FlatTorsoReward(env.robot, hard_margin=fhm, soft_margin=fsm)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, stand_height, hard_margin=thm, soft_margin=tsm)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot, margin=scm), rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=hmhm, soft_margin=hmsm)) env.reward_factory.register_reward(1, home_pos_reward) return env
env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.termination_factory.register_termination(terms.PerpetualTermination()) flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=np.pi) height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.025, 0.15) small_control = rewards.SmallControlReward(env.robot, margin=10) no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5, soft_margin=3) stand = rewards.AdditiveReward() stand.client = env.client stand.add_term(0.5, flat) stand.add_term(0.5, height) home_pos = rewards.MultiplicitiveReward(1, stand, small_control, no_move) env.reward_factory.register_reward(1, home_pos) joint_params = [] num_joints = env.client.getNumJoints(env.robot) for joint in range(num_joints): joint_params.append(env.client.addUserDebugParameter( 'Joint {}'.format( env.client.getJointInfo(env.robot, joint)[1].decode('UTF-8')), -2 * np.pi, 2 * np.pi, 0)) camera_params = { 'fov': env.client.addUserDebugParameter('fov', 30, 150, 80), 'distance': env.client.addUserDebugParameter('distance', .1, 5, 1.5), 'yaw': env.client.addUserDebugParameter('yaw', -90, 90, 0),
def test_compute(self, name, coeff, values, expected_result): sub_r = (ReflectiveReward(v) for v in values) r = rewards.MultiplicitiveReward(coeff, *sub_r) r.client = 'client' self.assertEqual(r.compute(), expected_result)
def test_empty(self): r = rewards.MultiplicitiveReward(0) self.assertTupleEqual(r._terms, ()) with self.assertRaises(ValueError): r.compute()