def test_action_generator(): # Continuous action_len = 30 specs = BehaviorSpec( observation_shapes=[(5, )], action_type=ActionType.CONTINUOUS, action_shape=action_len, ) zero_action = specs.create_empty_action(4) assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32)) random_action = specs.create_random_action(4) assert random_action.dtype == np.float32 assert random_action.shape == (4, action_len) assert np.min(random_action) >= -1 assert np.max(random_action) <= 1 # Discrete action_shape = (10, 20, 30) specs = BehaviorSpec( observation_shapes=[(5, )], action_type=ActionType.DISCRETE, action_shape=action_shape, ) zero_action = specs.create_empty_action(4) assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32)) random_action = specs.create_random_action(4) assert random_action.dtype == np.int32 assert random_action.shape == (4, len(action_shape)) assert np.min(random_action) >= 0 for index, branch_size in enumerate(action_shape): assert np.max(random_action[:, index]) < branch_size
def test_specs(): specs = BehaviorSpec( observation_shapes=[(3, 2), (5, )], action_type=ActionType.CONTINUOUS, action_shape=3, ) assert specs.discrete_action_branches is None assert specs.action_size == 3 assert specs.create_empty_action(5).shape == (5, 3) assert specs.create_empty_action(5).dtype == np.float32 specs = BehaviorSpec( observation_shapes=[(3, 2), (5, )], action_type=ActionType.DISCRETE, action_shape=(3, ), ) assert specs.discrete_action_branches == (3, ) assert specs.action_size == 1 assert specs.create_empty_action(5).shape == (5, 1) assert specs.create_empty_action(5).dtype == np.int32