def test_register_termination(self): termination_factory = termination.TerminationFactory() dummy_termination_1 = DummyTermination(0, True) dummy_termination_2 = DummyTermination(0, False) termination_factory.register_termination(dummy_termination_1, dummy_termination_2) self.assertEqual(len(termination_factory._terminations), 2)
def test_actions(self): no_op = np.zeros(self.env.action_space.shape[0]) self.env.obs_factory.register_observation(CompliantObs(None)) self.env.termination_factory.register_termination( DummyTermination(0, True)) # Let the robot stabilize first for i in range(1000): self.env.step(no_op) position, orientation = p.getBasePositionAndOrientation(self.env.robot) with self.subTest('no action'): for i in range(10): self.env.step(no_op) new_pos, new_or = p.getBasePositionAndOrientation(self.env.robot) np.testing.assert_array_almost_equal(position, new_pos) np.testing.assert_array_almost_equal(orientation, new_or) with self.subTest('with action'): action = np.array([5.] * self.env.action_space.shape[0]) for i in range(10): self.env.step(action) new_pos, new_or = p.getBasePositionAndOrientation(self.env.robot) self.assert_array_not_almost_equal(position, new_pos) self.assert_array_not_almost_equal(orientation, new_or)
def test_step_simple_reward(self): self.env.obs_factory.register_observation(CompliantObs(None)) self.env.termination_factory.register_termination( DummyTermination(0, True)) obs, reward, done, info = self.env.step(self.env.action_space.sample()) self.assertEqual(reward, 1)
def test_action_normalization(self): joint_cnt = 12 config = solo_env.Solo8VanillaConfig() config.max_motor_rotation = 10 env = solo_env.Solo8VanillaEnv(config=config, normalize_actions=True) env.obs_factory.register_observation(CompliantObs(None)) env.termination_factory.register_termination(DummyTermination(0, True)) env.reward_factory.register_reward(1, SimpleReward()) mock_client = mock.MagicMock() env.client = mock_client with self.subTest('min bounds'): env.step([-1.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([-config.max_motor_rotation] * joint_cnt)) with self.subTest('max bounds'): env.step([1.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([config.max_motor_rotation] * joint_cnt)) with self.subTest('at 0'): env.step([0.] * joint_cnt) _, kwargs = mock_client.setJointMotorControlArray.call_args_list[-1] np.testing.assert_array_equal( kwargs['targetPositions'], np.array([0] * joint_cnt))
def test_reset(self): dummy_termination = DummyTermination(0, True) termination_factory = termination.TerminationFactory() termination_factory.register_termination(dummy_termination) self.assertEqual(1, dummy_termination.reset_counter) termination_factory.reset() self.assertEqual(2, dummy_termination.reset_counter)
def test_is_terminated(self): dummy_termination_true_1 = DummyTermination(0, True) dummy_termination_false_1 = DummyTermination(0, False) dummy_termination_false_2 = DummyTermination(0, False) with self.subTest('True and False Termination'): termination_factory = termination.TerminationFactory() termination_factory.register_termination( dummy_termination_true_1, dummy_termination_false_1) self.assertTrue(termination_factory.is_terminated()) with self.subTest('False and False Termination'): termination_factory = termination.TerminationFactory() termination_factory.register_termination( dummy_termination_false_1, dummy_termination_false_2) self.assertFalse(termination_factory.is_terminated())
def test_disjoint_environments(self): env1 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env1.obs_factory.register_observation(solo_obs.TorsoIMU(env1.robot)) env1.obs_factory.register_observation(solo_obs.MotorEncoder(env1.robot)) env1.reward_factory.register_reward(1, SimpleReward()) env1.termination_factory.register_termination(DummyTermination(0, True)) home_position = env1.reset() for i in range(1000): env1.step(env1.action_space.sample()) env2 = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig()) env2.obs_factory.register_observation(solo_obs.TorsoIMU(env2.robot)) env2.obs_factory.register_observation(solo_obs.MotorEncoder(env2.robot)) env2.reward_factory.register_reward(1, SimpleReward()) env2.termination_factory.register_termination(DummyTermination(0, True)) np.testing.assert_array_almost_equal(home_position, env2.reset())
def test_realtime(self, mock_time): env = solo_env.Solo8VanillaEnv(config=solo_env.Solo8VanillaConfig(), realtime=True) env.reward_factory.register_reward(1, SimpleReward()) env.obs_factory.register_observation(CompliantObs(None)) env.termination_factory.register_termination(DummyTermination(0, True)) env.step(env.action_space.sample()) self.assertTrue(mock_time.called)
def test_reset(self): self.env.obs_factory.register_observation(CompliantObs(None)) self.env.termination_factory.register_termination(DummyTermination(0, True)) base_pos, base_or = p.getBasePositionAndOrientation(self.env.robot) action = np.array([5.] * self.env.action_space.shape[0]) for _ in range(100): self.env.step(action) self.assertEqual( self.env.termination_factory._terminations[0].reset_counter, 1) new_pos, new_or = p.getBasePositionAndOrientation(self.env.robot) self.assert_array_not_almost_equal(base_pos, new_pos) self.assert_array_not_almost_equal(base_or, new_or) self.env.reset() self.assertEqual( self.env.termination_factory._terminations[0].reset_counter, 2) new_pos, new_or = p.getBasePositionAndOrientation(self.env.robot) np.testing.assert_array_almost_equal(base_pos, new_pos) np.testing.assert_array_almost_equal(base_or, new_or)