def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term(0.5, rewards.FlatTorsoReward(env.robot)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, quad_standing_height)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot), rewards.HorizontalMoveSpeedReward(env.robot, 0)) home_pos_reward.client = env.client env.reward_factory.register_reward(1, home_pos_reward) return env
def test_disjoint_environments(self): env1 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env1.obs_factory.register_observation(solo_obs.TorsoIMU(env1.robot)) env1.obs_factory.register_observation(solo_obs.MotorEncoder(env1.robot)) env1.reward_factory.register_reward(1, SimpleReward()) env1.termination_factory.register_termination(DummyTermination(0, True)) home_position = env1.reset() for i in range(1000): env1.step(env1.action_space.sample()) env2 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env2.obs_factory.register_observation(solo_obs.TorsoIMU(env2.robot)) env2.obs_factory.register_observation(solo_obs.MotorEncoder(env2.robot)) env2.reward_factory.register_reward(1, SimpleReward()) env2.termination_factory.register_termination(DummyTermination(0, True)) np.testing.assert_array_almost_equal(home_position, env2.reset())
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.reward_factory.register_reward(1, rewards.UprightReward(env.robot)) env.termination_factory.register_termination(terms.TimeBasedTermination(length)) return env
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=False) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation(obs.MotorEncoder(env.robot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) env.reward_factory.register_reward( .2, rewards.SmallControlReward(env.robot)) env.reward_factory.register_reward( .2, rewards.HorizontalMoveSpeedReward(env.robot, 0)) env.reward_factory.register_reward(.3, rewards.FlatTorsoReward(env.robot)) env.reward_factory.register_reward( .3, rewards.TorsoHeightReward(env.robot, quad_standing_height)) return env
def test_step_and_reset(self): # Realtime only works in GUI mode env = solo_env.RealtimeSolo8VanillaEnv(use_gui=True) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.reset() starting_pos, _ = env.obs_factory.get_obs() time.sleep(.5) sleep_pos, _ = env.obs_factory.get_obs() env.step(env.action_space.sample()) init_step_pos, _ = env.obs_factory.get_obs() time.sleep(.5) end_step_pos, _ = env.obs_factory.get_obs() self.assertEqual(len(starting_pos), len(sleep_pos)) self.assertEqual(len(sleep_pos), len(init_step_pos)) self.assertEqual(len(init_step_pos), len(end_step_pos)) np.testing.assert_array_almost_equal(starting_pos, sleep_pos, decimal=3) with self.assertRaises(AssertionError): np.testing.assert_array_almost_equal(init_step_pos, end_step_pos)
def _init(): env_config = solo8v2vanilla.Solo8VanillaConfig() env_config.max_motor_rotation = max_motor_rot env = gym.make('solo8vanilla-v0', config=env_config, normalize_actions=True, normalize_observations=True) env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) env.obs_factory.register_observation( obs.MotorEncoder(env.robot, max_rotation=max_motor_rot)) env.termination_factory.register_termination( terms.TimeBasedTermination(length)) stand_reward = rewards.AdditiveReward() stand_reward.client = env.client stand_reward.add_term( 0.5, rewards.FlatTorsoReward(env.robot, hard_margin=fhm, soft_margin=fsm)) stand_reward.add_term( 0.5, rewards.TorsoHeightReward(env.robot, stand_height, hard_margin=thm, soft_margin=tsm)) home_pos_reward = rewards.MultiplicitiveReward( 1, stand_reward, rewards.SmallControlReward(env.robot, margin=scm), rewards.HorizontalMoveSpeedReward(env.robot, 0, hard_margin=hmhm, soft_margin=hmsm)) env.reward_factory.register_reward(1, home_pos_reward) return env
""" A demo for the Solo v2 Vanilla with realtime control. This environment is designed to act like a real robot would. There is no concept of a "timestep"; rather the step command should be interprated as sending position values to the robot's PID controllers. """ import gym import numpy as np import gym_solo from gym_solo.envs import solo8v2vanilla from gym_solo.core import obs if __name__ == '__main__': env = gym.make('solo8vanilla-realtime-v0') env.obs_factory.register_observation(obs.TorsoIMU(env.robot)) try: print("""\n ============================================= Solo 8 v2 Vanilla Realtime Simulation Simulation Active. Exit with ^C. ============================================= """) env.reset() while True: pos = float(