Exemple #1
0
def test_action_generator():
    # Continuous
    action_len = 30
    specs = BehaviorSpec(
        observation_shapes=[(5, )],
        action_type=ActionType.CONTINUOUS,
        action_shape=action_len,
    )
    zero_action = specs.create_empty_action(4)
    assert np.array_equal(zero_action,
                          np.zeros((4, action_len), dtype=np.float32))
    random_action = specs.create_random_action(4)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
    assert np.max(random_action) <= 1

    # Discrete
    action_shape = (10, 20, 30)
    specs = BehaviorSpec(
        observation_shapes=[(5, )],
        action_type=ActionType.DISCRETE,
        action_shape=action_shape,
    )
    zero_action = specs.create_empty_action(4)
    assert np.array_equal(zero_action,
                          np.zeros((4, len(action_shape)), dtype=np.int32))

    random_action = specs.create_random_action(4)
    assert random_action.dtype == np.int32
    assert random_action.shape == (4, len(action_shape))
    assert np.min(random_action) >= 0
    for index, branch_size in enumerate(action_shape):
        assert np.max(random_action[:, index]) < branch_size
Exemple #2
0
def create_agent_buffer(behavior_spec: BehaviorSpec,
                        number: int,
                        reward: float = 0.0) -> AgentBuffer:
    buffer = AgentBuffer()
    curr_observations = [
        np.random.normal(size=shape)
        for shape in behavior_spec.observation_shapes
    ]
    next_observations = [
        np.random.normal(size=shape)
        for shape in behavior_spec.observation_shapes
    ]
    action = behavior_spec.create_random_action(1)[0, :]
    for _ in range(number):
        curr_split_obs = SplitObservations.from_observations(curr_observations)
        next_split_obs = SplitObservations.from_observations(next_observations)
        for i, _ in enumerate(curr_split_obs.visual_observations):
            buffer["visual_obs%d" % i].append(
                curr_split_obs.visual_observations[i])
            buffer["next_visual_obs%d" % i].append(
                next_split_obs.visual_observations[i])
        buffer["vector_obs"].append(curr_split_obs.vector_observations)
        buffer["next_vector_in"].append(next_split_obs.vector_observations)
        buffer["actions"].append(action)
        buffer["done"].append(np.zeros(1, dtype=np.float32))
        buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
        buffer["masks"].append(np.ones(1, dtype=np.float32))
    return buffer