def test_action_normalization(self): joint_cnt = 12 config = solo_env.Solo8VanillaConfig() config.max_motor_rotation = 10 env = solo_env.Solo8VanillaEnv(config=config, normalize_actions=True) env.obs_factory.register_observation(CompliantObs(None)) env.termination_factory.register_termination(DummyTermination(0, True)) env.reward_factory.register_reward(1, SimpleReward()) mock_client = mock.MagicMock() env.client = mock_client with self.subTest('min bounds'): env.step([-1.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([-config.max_motor_rotation] * joint_cnt)) with self.subTest('max bounds'): env.step([1.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([config.max_motor_rotation] * joint_cnt)) with self.subTest('at 0'): env.step([0.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([0] * joint_cnt))
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term(0.5, rewards.FlatTorsoReward(env.robot)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, quad_standing_height)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot), rewards.HorizontalMoveSpeedReward(env.robot, 0)) home_pos_reward.client = env.client env.reward_factory.register_reward(1, home_pos_reward) return env
def test_disjoint_environments(self): env1 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env1.obs_factory.register_observation(solo_obs.TorsoIMU(env1.robot)) env1.obs_factory.register_observation(solo_obs.MotorEncoder(env1.robot)) env1.reward_factory.register_reward(1, SimpleReward()) env1.termination_factory.register_termination(DummyTermination(0, True)) home_position = env1.reset() for i in range(1000): env1.step(env1.action_space.sample()) env2 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env2.obs_factory.register_observation(solo_obs.TorsoIMU(env2.robot)) env2.obs_factory.register_observation(solo_obs.MotorEncoder(env2.robot)) env2.reward_factory.register_reward(1, SimpleReward()) env2.termination_factory.register_termination(DummyTermination(0, True)) np.testing.assert_array_almost_equal(home_position, env2.reset())
def test_action_space(self): limit = 2 * np.pi joint_cnt = 12 # 8 dof + 4 "ankle" joints space = spaces.Box(-limit, limit, shape=(joint_cnt, )) config = solo_env.Solo8VanillaConfig(motor_torque_limit=limit) env = solo_env.Solo8VanillaEnv(config=config) self.assertEqual(env.action_space, space)
def test_realtime(self, mock_time): env = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig(), realtime=True) env.reward_factory.register_reward(1, SimpleReward()) env.obs_factory.register_observation(CompliantObs(None)) env.termination_factory.register_termination(DummyTermination(0, True)) env.step(env.action_space.sample()) self.assertTrue(mock_time.called)
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.reward_factory.register_reward(1, rewards.UprightReward(env.robot)) env.termination_factory.register_termination(terms.TimeBasedTermination(length)) return env
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) env.reward_factory.register_reward( .2, rewards.SmallControlReward(env.robot)) env.reward_factory.register_reward( .2, rewards.HorizontalMoveSpeedReward(env.robot, 0)) env.reward_factory.register_reward(.3, rewards.FlatTorsoReward(env.robot)) env.reward_factory.register_reward( .3, rewards.TorsoHeightReward(env.robot, quad_standing_height)) return env
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env_config.max_motor_rotation = max_motor_rot env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=True, normalize_observations=True) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation( obs.MotorEncoder(env.robot, max_rotation=max_motor_rot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term( 0.5, rewards.FlatTorsoReward(env.robot, hard_margin=fhm, soft_margin=fsm)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, stand_height, hard_margin=thm, soft_margin=tsm)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot, margin=scm), rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=hmhm, soft_margin=hmsm)) env.reward_factory.register_reward(1, home_pos_reward) return env
"""A demo for the Solo8 v2 Vanilla with configurable position control on the the joints. """ import gym import numpy as np from gym_solo.envs import solo8v2vanilla from gym_solo.core import obs from gym_solo.core import rewards from gym_solo.core import termination as terms if __name__ == '__main__': config = solo8v2vanilla.Solo8VanillaConfig() env: solo8v2vanilla.Solo8VanillaEnv = gym.make('solo8vanilla-v0', use_gui=True, realtime=True, config=config) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.termination_factory.register_termination(terms.PerpetualTermination()) flat = rewards.FlatTorsoReward(env.robot, hard_margin=.1, soft_margin=np.pi) height = rewards.TorsoHeightReward(env.robot, 0.33698, 0.025, 0.15) small_control = rewards.SmallControlReward(env.robot, margin=10) no_move = rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=.5, soft_margin=3) stand = rewards.AdditiveReward() stand.client = env.client stand.add_term(0.5, flat)
def test_GUI_default(self, fake_reset, mock_client): solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) mock_client.assert_called_with(connection_mode=p.DIRECT)
def setUp(self): self.env = solo_env.Solo8VanillaEnv( config=solo_env.Solo8VanillaConfig()) self.env.reward_factory.register_reward(1, SimpleReward())
def test_step_no_rewards(self): env = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) with self.assertRaises(ValueError): env.step(np.zeros(self.env.action_space.shape[0]))
def test_normalized_action_space(self): config = solo_env.Solo8VanillaConfig() env = solo_env.Solo8VanillaEnv(config=config, normalize_actions=True) self.assertEqual(env.action_space, spaces.Box(-1, 1, shape=(12,)))