def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term(0.5, rewards.FlatTorsoReward(env.robot)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, quad_standing_height)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot), rewards.HorizontalMoveSpeedReward(env.robot, 0)) home_pos_reward.client = env.client env.reward_factory.register_reward(1, home_pos_reward) return env
def test_reset(self): max_step_delta = 3 term = termination.TimeBasedTermination(max_step_delta) for i in range(max_step_delta): self.assertFalse(term.is_terminated()) term.reset() self.assertEqual(0, term.step_delta)
def test_is_terminated(self): max_step_delta = 3 term = termination.TimeBasedTermination(max_step_delta) for i in range(max_step_delta): self.assertEqual(False, term.is_terminated()) self.assertEqual(i + 1, term.step_delta) self.assertEqual(True, term.is_terminated()) self.assertEqual(max_step_delta + 1, term.step_delta)
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.reward_factory.register_reward(1, rewards.UprightReward(env.robot)) env.termination_factory.register_termination(terms.TimeBasedTermination(length)) return env
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) env.reward_factory.register_reward( .2, rewards.SmallControlReward(env.robot)) env.reward_factory.register_reward( .2, rewards.HorizontalMoveSpeedReward(env.robot, 0)) env.reward_factory.register_reward(.3, rewards.FlatTorsoReward(env.robot)) env.reward_factory.register_reward( .3, rewards.TorsoHeightReward(env.robot, quad_standing_height)) return env
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env_config.max_motor_rotation = max_motor_rot env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=True, normalize_observations=True) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation( obs.MotorEncoder(env.robot, max_rotation=max_motor_rot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term( 0.5, rewards.FlatTorsoReward(env.robot, hard_margin=fhm, soft_margin=fsm)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, stand_height, hard_margin=thm, soft_margin=tsm)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot, margin=scm), rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=hmhm, soft_margin=hmsm)) env.reward_factory.register_reward(1, home_pos_reward) return env
def test_attributes(self): max_step_delta = 3 term = termination.TimeBasedTermination(max_step_delta) self.assertEqual(max_step_delta, term.max_step_delta) self.assertEqual(0, term.step_delta)
from gym_solo.envs import solo8v2vanilla from gym_solo.core import obs from gym_solo.core import rewards from gym_solo.core import termination as terms if __name__ == '__main__': config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', use_gui=True, realtime=True, config=config) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.reward_factory.register_reward(1, rewards.UprightReward(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(1000)) try: print("""\n ============================================= Solo 8 v2 Vanilla Episode Test Simulation Active. Exit with ^C. ============================================= """) for i in range(10): env.reset() done = False