Example #1
0
def test_handles_done_in_substep_with_recorded_episode_stats():
    """Recording episode stats and handling environments that return done during a (non-last) sub-step."""
    env = build_dummy_structured_env()
    env = _FiveSubstepsLimitWrapper.wrap(env)
    policy = RandomPolicy(env.action_spaces_dict)

    # -- Normal operation (should reset the env automatically and continue rollout) --
    rollout_generator = RolloutGenerator(env=env, record_episode_stats=True)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 10

    # The done step records should have data for the first sub-step only
    dones = 0
    for step_record in trajectory.step_records:
        if step_record.is_done():
            assert [0] == list(step_record.observations_dict.keys())
            dones += 1
            assert step_record.episode_stats is not None
        else:
            assert [0, 1] == list(step_record.observations_dict.keys())
            assert step_record.episode_stats is None
    assert dones == 3  # Each episode is done after 5 sub-steps, i.e. 3 structured steps get recorded => 3 episodes fit

    # -- Terminate on done --
    rollout_generator = RolloutGenerator(env=env, terminate_on_done=True)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 3
    assert trajectory.is_done()
    assert [0] == list(trajectory.step_records[-1].observations_dict.keys())
Example #2
0
def test_records_next_observations():
    """Recording next observations."""
    env = build_dummy_structured_env()
    rollout_generator = RolloutGenerator(env=env,
                                         record_next_observations=True)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(policy, n_steps=10)

    assert len(trajectory) == 10

    sub_step_keys = env.action_spaces_dict.keys()
    last_next_obs = None
    for record in trajectory.step_records:
        assert sub_step_keys == record.observations_dict.keys()
        assert sub_step_keys == record.next_observations_dict.keys()
        assert record.batch_shape is None

        for step_key in sub_step_keys:
            curr_obs = record.observations_dict[step_key]

            # Next obs from the previous sub-step should be equal to the current observation
            if last_next_obs:
                assert list(curr_obs.keys()) == list(last_next_obs.keys())
                for obs_key in curr_obs.keys():
                    assert np.all(curr_obs[obs_key] == last_next_obs[obs_key])

            last_next_obs = record.next_observations_dict[step_key]
Example #3
0
def run_observation_normalization_pipeline(
        normalization_config) -> ObservationNormalizationWrapper:
    """ observation normalization test """

    # wrap env with observation normalization
    env = GymMazeEnv("CartPole-v0")
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        exclude=normalization_config["exclude"],
        sampling_policy=RandomPolicy(env.action_spaces_dict),
        manual_config=normalization_config["manual_config"])

    # estimate normalization statistics
    statistics = obtain_normalization_statistics(env, n_samples=1000)

    # check statistics
    for sub_step_key in env.observation_spaces_dict:
        for obs_key in env.observation_spaces_dict[sub_step_key].spaces:
            assert obs_key in statistics
            for stats_key in statistics[obs_key]:
                stats = statistics[obs_key][stats_key]
                assert isinstance(stats, np.ndarray)

    # test normalization
    random_env_steps(env, steps=100)

    return env
def test_random_sampling_seeding():
    """Test the seeding with a random env version and random sampling (fully stochastic)"""
    env = GymMazeEnv(env="CartPole-v0")
    policy = RandomPolicy(env.action_spaces_dict)

    perform_seeding_test(env,
                         policy,
                         is_deterministic_env=False,
                         is_deterministic_agent=False)
Example #5
0
def test_terminates_on_done():
    """Resetting the env or terminating rollout early when the env is done."""
    env = build_dummy_maze_env()
    env = TimeLimitWrapper.wrap(env, max_episode_steps=5)
    policy = RandomPolicy(env.action_spaces_dict)

    # Normal operation (should reset the env automatically and continue rollout)
    rollout_generator = RolloutGenerator(env=env)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 10

    # Terminate on done
    rollout_generator = RolloutGenerator(env=env, terminate_on_done=True)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 5
Example #6
0
def test_observation_normalization_manual_default_stats():
    """ observation normalization test """

    # init environment
    env = GymMazeEnv("CartPole-v0")

    # normalization config
    normalization_config = {
        "default_strategy":
        "maze.normalization_strategies.MeanZeroStdOneObservationNormalizationStrategy",
        "default_strategy_config": {
            "clip_range": (0, 1),
            "axis": 0
        },
        "default_statistics": {
            "mean": [0, 0, 0, 0],
            "std": [1, 1, 1, 1]
        },
        "statistics_dump": "statistics.pkl",
        "sampling_policy": RandomPolicy(env.action_spaces_dict),
        "exclude": None,
        "manual_config": None,
    }

    # wrap env with observation normalization
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check if action space clipping was applied
    assert np.alltrue(env.observation_space["observation"].high <= 1.0)
    assert np.alltrue(env.observation_space["observation"].low >= 0.0)

    # check if stats have been set properly
    statistics = env.get_statistics()
    assert np.all(statistics["observation"]["mean"] == np.zeros(shape=4))
    assert np.all(statistics["observation"]["std"] == np.ones(shape=4))

    # test sampling
    obs = random_env_steps(env, steps=100)
    assert np.min(obs) >= 0 and np.max(obs) <= 1
Example #7
0
def test_works_with_gym_maze_envs():
    env = GymMazeEnv("CartPole-v0")
    policy = RandomPolicy(action_spaces_dict=env.action_spaces_dict)

    agent_deployment = AgentDeployment(
        policy=policy,
        env=env
    )

    external_env = gym.make("CartPole-v0")

    maze_state = external_env.reset()
    reward, done, info = 0, False, {}

    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = external_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
Example #8
0
def test_standard_rollout():
    """Rollout with a single structured env."""
    env = build_dummy_structured_env()
    rollout_generator = RolloutGenerator(env=env)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(policy, n_steps=10)

    assert len(trajectory) == 10

    sub_step_keys = env.action_spaces_dict.keys()
    for record in trajectory.step_records:
        assert sub_step_keys == record.actions_dict.keys()
        assert sub_step_keys == record.observations_dict.keys()
        assert sub_step_keys == record.rewards_dict.keys()

        assert record.batch_shape is None
        for step_key in sub_step_keys:
            assert record.observations_dict[
                step_key] in env.observation_spaces_dict[step_key]
            assert record.actions_dict[step_key] in env.action_spaces_dict[
                step_key]
def test_custom_model_composer_with_shared_embedding():
    env = build_dummy_structured_env()

    policies = {
        "_target_":
        "maze.perception.models.policies.ProbabilisticPolicyComposer",
        "networks": [{
            "_target_":
            "maze.perception.models.built_in.flatten_concat_shared_embedding.FlattenConcatSharedEmbeddingPolicyNet",
            "non_lin": "torch.nn.SELU",
            "hidden_units": [16],
            "head_units": [16]
        }, {
            "_target_":
            "maze.perception.models.built_in.flatten_concat_shared_embedding.FlattenConcatSharedEmbeddingPolicyNet",
            "non_lin": "torch.nn.SELU",
            "hidden_units": [16],
            "head_units": [16]
        }],
        "substeps_with_separate_agent_nets": []
    }

    step_critic = {
        "_target_":
        "maze.perception.models.critics.StepStateCriticComposer",
        "networks": [{
            "_target_":
            "maze.perception.models.built_in.flatten_concat_shared_embedding.FlattenConcatSharedEmbeddingStateValueNet",
            "non_lin": "torch.nn.SELU",
            "head_units": [16]
        }, {
            "_target_":
            "maze.perception.models.built_in.flatten_concat_shared_embedding.FlattenConcatSharedEmbeddingStateValueNet",
            "non_lin": "torch.nn.SELU",
            "head_units": [16]
        }]
    }

    # check if model config is fine
    CustomModelComposer.check_model_config({"critic": step_critic})

    composer = CustomModelComposer(
        action_spaces_dict=env.action_spaces_dict,
        observation_spaces_dict=env.observation_spaces_dict,
        agent_counts_dict=env.agent_counts_dict,
        distribution_mapper_config=[],
        policy=policies,
        critic=step_critic)

    assert isinstance(composer.distribution_mapper, DistributionMapper)
    assert isinstance(composer.critic, TorchStepStateCritic)
    assert isinstance(composer.critic.networks, dict)

    # test saving models
    composer.save_models()

    try:
        import pygraphviz

        for model_file in [
                "critic_0.pdf", "critic_1.pdf", "policy_0.pdf", "policy_1.pdf"
        ]:
            file_path = os.path.join(os.getcwd(), model_file)
            assert os.path.exists(file_path)
            os.remove(file_path)
    except ImportError:
        pass  # no output generated as pygraphviz is not installed.

    rollout_generator = RolloutGenerator(env=env,
                                         record_next_observations=False)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(
        policy, n_steps=10).stack().to_torch(device='cpu')

    policy_output = composer.policy.compute_policy_output(trajectory)
    critic_input = StateCriticInput.build(policy_output, trajectory)
    _ = composer.critic.predict_values(critic_input)
Example #10
0
def test_redistributes_actor_reward_if_available():
    env = build_dummy_maze_env_with_structured_core_env()
    rollout_generator = RolloutGenerator(env=env)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(policy, n_steps=1)
    assert np.all(trajectory.step_records[0].rewards == [1, 1])
Example #11
0
def build_single_step_with_critic_type(
    critics_composer_type: type(BaseStateCriticComposer),
    critics_type: type(TorchStateCritic),
    shared_embedding_keys: Optional[Union[List[str], Dict[StepKeyType,
                                                          List[str]]]]):
    """ helper function """
    # init environment
    env = GymMazeEnv('CartPole-v0')
    observation_space = env.observation_space
    action_space = env.action_space

    # map observations to a modality
    obs_modalities = {
        obs_key: "feature"
        for obs_key in observation_space.spaces.keys()
    }
    # define how to process a modality
    modality_config = dict()
    modality_config["feature"] = {
        "block_type": "maze.perception.blocks.DenseBlock",
        "block_params": {
            "hidden_units": [32, 32],
            "non_lin": "torch.nn.ReLU"
        }
    }
    modality_config["hidden"] = {
        "block_type": "maze.perception.blocks.DenseBlock",
        "block_params": {
            "hidden_units": [64],
            "non_lin": "torch.nn.ReLU"
        }
    }
    modality_config["recurrence"] = {}

    model_builder = {
        '_target_': 'maze.perception.builders.concat.ConcatModelBuilder',
        'modality_config': modality_config,
        'observation_modality_mapping': obs_modalities,
        'shared_embedding_keys': shared_embedding_keys
    }

    # initialize default model builder
    default_builder = TemplateModelComposer(
        action_spaces_dict={0: action_space},
        observation_spaces_dict={0: observation_space},
        agent_counts_dict={0: 1},
        distribution_mapper_config={},
        model_builder=model_builder,
        policy={'_target_': policy_composer_type},
        critic={'_target_': critics_composer_type})

    # create model pdf
    default_builder.save_models()

    assert isinstance(default_builder.distribution_mapper, DistributionMapper)
    assert isinstance(default_builder.policy.networks[0], nn.Module)
    assert isinstance(default_builder.critic.networks[0], nn.Module)
    assert isinstance(default_builder.critic, critics_type)

    # test default policy gradient actor
    policy_net = default_builder.policy.networks[0]
    assert isinstance(policy_net, InferenceBlock)

    assert "action" in policy_net.out_keys
    assert policy_net.out_shapes()[0] == (2, )

    # test standalone critic
    value_net = default_builder.critic.networks[0]
    assert isinstance(value_net, InferenceBlock)
    assert "value" in value_net.out_keys
    assert value_net.out_shapes()[0] == (1, )

    if shared_embedding_keys is not None:
        if isinstance(shared_embedding_keys, list):
            assert all([
                shared_key in policy_net.out_keys
                for shared_key in shared_embedding_keys
            ])
            assert all([
                shared_key in value_net.in_keys
                for shared_key in shared_embedding_keys
            ])
        else:
            assert all([
                shared_key in policy_net.out_keys
                for shared_keylist in shared_embedding_keys.values()
                for shared_key in shared_keylist
            ])
            assert all([
                shared_key in value_net.in_keys
                for shared_keylist in shared_embedding_keys.values()
                for shared_key in shared_keylist
            ])
    else:
        assert value_net.in_keys == policy_net.in_keys

    rollout_generator = RolloutGenerator(env=env,
                                         record_next_observations=False)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(
        policy, n_steps=10).stack().to_torch(device='cpu')

    policy_output = default_builder.policy.compute_policy_output(trajectory)
    critic_input = StateCriticInput.build(policy_output, trajectory)
    _ = default_builder.critic.predict_values(critic_input)
Example #12
0
def test_observation_statistics_logging():
    """ observation normalization logging test """

    # normalization config
    normalization_config = {
        "default_strategy":
        "maze.normalization_strategies.MeanZeroStdOneObservationNormalizationStrategy",
        "default_strategy_config": {
            "clip_range": (None, None),
            "axis": 0
        },
        "default_statistics": None,
        "statistics_dump": "statistics.pkl",
        "exclude": None,
        "manual_config": {
            "observation": {
                "strategy":
                "maze.normalization_strategies.MeanZeroStdOneObservationNormalizationStrategy",
                "strategy_config": {
                    "clip_range": (0, 1)
                },
                "statistics": {
                    "mean": [0, 0, 0, 0],
                    "std": [1, 1, 1, 1]
                }
            }
        }
    }
    writer = LogStatsWriterTensorboard(log_dir='test_log',
                                       tensorboard_render_figure=True)
    register_log_stats_writer(writer)
    # attach a console writer as well for immediate console feedback
    register_log_stats_writer(LogStatsWriterConsole())

    # init environment
    env = GymMazeEnv("CartPole-v0")

    # wrap env with observation normalization
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=RandomPolicy(env.action_spaces_dict),
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    env = LogStatsWrapper.wrap(env, logging_prefix="train")

    n_episodes = 10
    n_steps_per_episode = 100
    for episode in range(n_episodes):
        _ = env.reset()
        for step in range(n_steps_per_episode):
            # take random action
            action = env.action_space.sample()

            # take step in env and trigger log stats writing
            _, _, done, _ = env.step(action)

            if done:
                break

        increment_log_step()
Example #13
0
def test_observation_normalization_pipeline():
    """ observation normalization test """

    # wrap env with observation normalization
    env = GymMazeEnv("CartPole-v0")
    # normalization config
    normalization_config = {
        "default_strategy":
        "maze.normalization_strategies.RangeZeroOneObservationNormalizationStrategy",
        "default_strategy_config": {
            "clip_range": (None, None),
            "axis": 0
        },
        "default_statistics": None,
        "sampling_policy": RandomPolicy(env.action_spaces_dict),
        "statistics_dump": "statistics.pkl",
        "exclude": None,
        "manual_config": None
    }
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check statistics
    statistics = env.get_statistics()
    assert statistics["observation"] is None, statistics

    # check that assertion is thrown
    with pytest.raises(AssertionError):
        random_env_steps(env, steps=1)

    # estimate normalization statistics
    statistics = obtain_normalization_statistics(env, n_samples=1000)

    # check statistics
    for sub_step_key in env.observation_spaces_dict:
        for obs_key in env.observation_spaces_dict[sub_step_key].spaces:
            assert obs_key in statistics
            for stats_key in statistics[obs_key]:
                stats = statistics[obs_key][stats_key]
                assert isinstance(stats, np.ndarray)

    # test normalization
    random_env_steps(env, steps=100)

    # test file dump and loading
    statistics_copy = copy.deepcopy(env.get_statistics())
    assert os.path.exists("statistics.pkl")

    # wrap env with observation normalization
    env = GymMazeEnv("CartPole-v0")
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check if stats loading worked properly
    statistics = env.get_statistics()
    for _ in env.observation_spaces_dict:
        for obs_key in statistics:
            for stats_key in statistics[obs_key]:
                assert np.all(statistics[obs_key][stats_key] ==
                              statistics_copy[obs_key][stats_key])

    # check if stepping works
    random_env_steps(env, steps=100)
Example #14
0
def test_default_action_space_sampling():
    env = build_dummy_maze_env()
    policy = RandomPolicy(env.action_spaces_dict)
    action = policy.compute_action(observation=env.observation_space.sample(), maze_state=None)
    assert action in env.action_space
Example #15
0
# OBSERVATION NORMALIZATION
# -------------------------

# we wrap the environment with the ObservationNormalizationWrapper
# (you can find details on this in the section on observation normalization)
env = ObservationNormalizationWrapper(
    env=env,
    default_strategy=
    "maze.normalization_strategies.MeanZeroStdOneObservationNormalizationStrategy",
    default_strategy_config={
        "clip_range": (None, None),
        "axis": 0
    },
    default_statistics=None,
    statistics_dump="statistics.pkl",
    sampling_policy=RandomPolicy(env.action_spaces_dict),
    exclude=None,
    manual_config=None)

# next we estimate the normalization statistics by
# (1) collecting observations by randomly sampling 1000 transitions from the environment
# (2) computing the statistics according to the define normalization strategy
normalization_statistics = obtain_normalization_statistics(env, n_samples=1000)
env.set_normalization_statistics(normalization_statistics)

# after this step all observations returned by the environment will be normalized

# stable-baselines does not support dict spaces so we have to remove them
env = NoDictSpacesWrapper(env)

# TRAINING AND ROLLOUT (remains unchanged)
import gym

from maze.core.agent.random_policy import RandomPolicy
from maze.core.agent_deployment.agent_deployment import AgentDeployment
from maze.core.wrappers.maze_gym_env_wrapper import GymMazeEnv

env = GymMazeEnv("CartPole-v0")
policy = RandomPolicy(action_spaces_dict=env.action_spaces_dict)

agent_deployment = AgentDeployment(policy=policy, env=env)

# Simulate an external production environment that does not use Maze
external_env = gym.make("CartPole-v0")

maze_state = external_env.reset()
reward, done, info = 0, False, {}

for i in range(10):
    # Query the agent deployment for maze action, then step the environment with it
    maze_action = agent_deployment.act(maze_state, reward, done, info)
    maze_state, reward, done, info = external_env.step(maze_action)

agent_deployment.close(maze_state, reward, done, info)
Example #17
0
def build_structured_with_critic_type(
    env, critics_composer_type: type(BaseStateCriticComposer),
    critics_type: type(TorchStateCritic),
    shared_embedding_keys: Optional[Union[List[str], Dict[StepKeyType,
                                                          List[str]]]]):
    """ helper function """

    # map observations to a modality
    obs_modalities = {
        "observation_0": "image",
        "observation_1": "feature",
        DeltaStateCriticComposer.prev_value_key: 'feature'
    }

    # define how to process a modality
    modality_config = dict()
    modality_config["feature"] = {
        "block_type": "maze.perception.blocks.DenseBlock",
        "block_params": {
            "hidden_units": [32, 32],
            "non_lin": "torch.nn.ReLU"
        }
    }
    modality_config['image'] = {
        'block_type': 'maze.perception.blocks.StridedConvolutionDenseBlock',
        'block_params': {
            'hidden_channels': [8, 16, 32],
            'hidden_kernels': [8, 4, 4],
            'convolution_dimension': 2,
            'hidden_strides': [4, 2, 2],
            'hidden_dilations': None,
            'hidden_padding': [1, 1, 1],
            'padding_mode': None,
            'hidden_units': [],
            'non_lin': 'torch.nn.SELU'
        }
    }

    modality_config["hidden"] = {
        "block_type": "maze.perception.blocks.DenseBlock",
        "block_params": {
            "hidden_units": [64],
            "non_lin": "torch.nn.ReLU"
        }
    }
    modality_config["recurrence"] = {}

    model_builder = {
        '_target_': 'maze.perception.builders.concat.ConcatModelBuilder',
        'modality_config': modality_config,
        'observation_modality_mapping': obs_modalities,
        'shared_embedding_keys': shared_embedding_keys
    }

    # initialize default model builder
    default_builder = TemplateModelComposer(
        action_spaces_dict=env.action_spaces_dict,
        observation_spaces_dict=env.observation_spaces_dict,
        agent_counts_dict=env.agent_counts_dict,
        distribution_mapper_config={},
        model_builder=model_builder,
        policy={'_target_': policy_composer_type},
        critic={'_target_': critics_composer_type})

    # create model pdf
    default_builder.save_models()

    assert isinstance(default_builder.distribution_mapper, DistributionMapper)
    for pp in default_builder.policy.networks.values():
        assert isinstance(pp, nn.Module)
    for cc in default_builder.critic.networks.values():
        assert isinstance(cc, nn.Module)

    assert isinstance(default_builder.critic, critics_type)

    rollout_generator = RolloutGenerator(env=env,
                                         record_next_observations=False)
    policy = RandomPolicy(env.action_spaces_dict)
    trajectory = rollout_generator.rollout(
        policy, n_steps=10).stack().to_torch(device='cpu')

    policy_output = default_builder.policy.compute_policy_output(trajectory)
    critic_input = StateCriticInput.build(policy_output, trajectory)
    _ = default_builder.critic.predict_values(critic_input)