コード例 #1
0
def eval_domain_params(
        pool: SamplerPool,
        env: SimEnv,
        policy: Policy,
        params: List[Dict],
        init_state: Optional[np.ndarray] = None) -> List[StepSequence]:
    """
    Evaluate a policy on a multidimensional grid of domain parameters.

    :param pool: parallel sampler
    :param env: environment to evaluate in
    :param policy: policy to evaluate
    :param params: multidimensional grid of domain parameters
    :param init_state: initial state of the environment which will be fixed if not set to `None`
    :return: list of rollouts
    """
    # Strip all domain randomization wrappers from the environment
    env = remove_all_dr_wrappers(env, verbose=True)
    if init_state is not None:
        env.init_space = SingularStateSpace(fixed_state=init_state)

    pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy))

    # Run with progress bar
    with tqdm(leave=False, file=sys.stdout, unit="rollouts",
              desc="Sampling") as pb:
        return pool.run_map(
            functools.partial(_ps_run_one_domain_param, eval=True), params, pb)
コード例 #2
0
def test_setting_dp_vals(env: SimEnv):
    # Loop over all possible domain parameters and set them to a random value
    for _ in range(5):
        for dp_key in env.supported_domain_param:
            if any([
                    s in dp_key for s in [
                        "slip", "compliance", "linearvelocitydamping",
                        "angularvelocitydamping"
                    ]
            ]):
                # Skip the parameters that are only available in Vortex but not in Bullet
                assert True
            else:
                nominal_val = env.domain_param.get(dp_key)
                rand_val = nominal_val + nominal_val * np.random.rand() / 10
                env.reset(domain_param={dp_key: rand_val})
                assert env.domain_param[dp_key] == pytest.approx(
                    rand_val, abs=5e-4)  # rolling friction is imprecise
コード例 #3
0
def test_parallel_sampling_deterministic_wo_min_steps(
    env: SimEnv,
    policy: Policy,
    min_rollouts: Optional[int],
    init_states: Optional[int],
    domain_params: Optional[List[dict]],
):
    env.max_steps = 20

    if init_states is not None:
        init_states = [
            env.spec.state_space.sample_uniform() for _ in range(init_states)
        ]

    nums_workers = (1, 2, 4)

    all_rollouts = []
    for num_workers in nums_workers:
        # Act an exploration strategy to test if that works too (it should as the policy gets pickled and distributed
        # anyway).
        all_rollouts.append(
            ParallelRolloutSampler(
                env,
                NormalActNoiseExplStrat(policy, std_init=1.0),
                num_workers=num_workers,
                min_rollouts=min_rollouts,
                seed=0,
            ).sample(init_states=init_states, domain_params=domain_params))

    # Test that the rollouts are actually different, i.e., that not the same seed is used for all rollouts.
    for ros in all_rollouts:
        for ro_a, ro_b in [(a, b) for a in ros for b in ros if a is not b]:
            # The idle policy iy deterministic and always outputs the zero action. Hence, do not check that the actions
            # are different when using the idle policy.
            if isinstance(policy, IdlePolicy):
                # The Quanser Ball Balancer is a deterministic environment (conditioned on the initial state). As the
                # idle policy is a deterministic policy, this will result in the rollouts being equivalent for each
                # initial state, so do not check for difference if the initial states where set.
                if init_states is None:
                    assert ro_a.rewards != pytest.approx(ro_b.rewards)
                    assert ro_a.observations != pytest.approx(
                        ro_b.observations)
            else:
                assert ro_a.rewards != pytest.approx(ro_b.rewards)
                assert ro_a.observations != pytest.approx(ro_b.observations)
                assert ro_a.actions != pytest.approx(ro_b.actions)

    # Test that the rollouts for all number of workers are equal.
    for ros_a, ros_b in [(a, b) for a in all_rollouts for b in all_rollouts]:
        assert len(ros_a) == len(ros_b)
        for ro_a, ro_b in zip(ros_a, ros_b):
            assert ro_a.rewards == pytest.approx(ro_b.rewards)
            assert ro_a.observations == pytest.approx(ro_b.observations)
            assert ro_a.actions == pytest.approx(ro_b.actions)
コード例 #4
0
def test_act_noise_simple(env: SimEnv):
    # Typical case with zero mean and non-zero std
    wrapped_env = GaussianActNoiseWrapper(env,
                                          noise_std=0.2 *
                                          np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)

    # Unusual case with non-zero mean and zero std
    wrapped_env = GaussianActNoiseWrapper(env,
                                          noise_mean=0.1 *
                                          np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)

    # General case with non-zero mean and non-zero std
    wrapped_env = GaussianActNoiseWrapper(
        env,
        noise_mean=0.1 * np.ones(env.act_space.shape),
        noise_std=0.2 * np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)
コード例 #5
0
def test_parallel_sampling_deterministic_w_min_steps(
    env: SimEnv,
    policy: Policy,
    min_rollouts: Optional[int],
    min_steps: int,
    domain_params: Optional[List[dict]],
):
    env.max_steps = 20

    nums_workers = (1, 2, 4)

    all_rollouts = []
    for num_workers in nums_workers:
        # Act an exploration strategy to test if that works too (it should as the policy gets pickled and distributed
        # anyway).
        all_rollouts.append(
            ParallelRolloutSampler(
                env,
                NormalActNoiseExplStrat(policy, std_init=1.0),
                num_workers=num_workers,
                min_rollouts=min_rollouts,
                min_steps=min_steps * env.max_steps,
                seed=0,
            ).sample(domain_params=domain_params))

    # Test that the rollouts are actually different, i.e., that not the same seed is used for all rollouts.
    for ros in all_rollouts:
        for ro_a, ro_b in [(a, b) for a in ros for b in ros if a is not b]:
            # The idle policy iy deterministic and always outputs the zero action. Hence, do not check that the actions
            # are different when using the idle policy.
            if not isinstance(policy, IdlePolicy):
                assert ro_a.rewards != pytest.approx(ro_b.rewards)
                assert ro_a.observations != pytest.approx(ro_b.observations)
                assert ro_a.actions != pytest.approx(ro_b.actions)

    # Test that the rollouts for all number of workers are equal.
    for ros_a, ros_b in [(a, b) for a in all_rollouts for b in all_rollouts]:
        assert sum([len(ro) for ro in ros_a]) == sum([len(ro) for ro in ros_b])
        assert sum([len(ro) for ro in ros_a]) >= min_steps * env.max_steps
        assert sum([len(ro) for ro in ros_b]) >= min_steps * env.max_steps
        assert len(ros_a) == len(ros_b)
        if min_rollouts is not None:
            assert len(ros_a) >= min_rollouts
            assert len(ros_b) >= min_rollouts
        for ro_a, ro_b in zip(ros_a, ros_b):
            assert ro_a.rewards == pytest.approx(ro_b.rewards)
            assert ro_a.observations == pytest.approx(ro_b.observations)
            assert ro_a.actions == pytest.approx(ro_b.actions)
コード例 #6
0
def test_domain_param_transforms(env: SimEnv, trafo_class: Type):
    pyrado.set_seed(0)

    # Create a mask for a random domain parameter
    offset = 1
    idx = random.randint(0, len(env.supported_domain_param) - 1)
    sel_dp_change = list(env.supported_domain_param)[idx]
    sel_dp_fix = list(
        env.supported_domain_param)[(idx + offset) %
                                    len(env.supported_domain_param)]
    while (offset == 1 or any([
            item in sel_dp_change for item in VORTEX_ONLY_DOMAIN_PARAM_LIST
    ]) or any([item in sel_dp_fix for item in VORTEX_ONLY_DOMAIN_PARAM_LIST])):
        idx = random.randint(0, len(env.supported_domain_param) - 1)
        sel_dp_change = list(env.supported_domain_param)[idx]
        sel_dp_fix = list(
            env.supported_domain_param)[(idx + offset) %
                                        len(env.supported_domain_param)]
        offset += 1

    mask = (sel_dp_change, )
    wenv = trafo_class(env, mask)
    assert isinstance(wenv, DomainParamTransform)

    # Check 5 random values
    for _ in range(5):
        # Change the selected domain parameter
        new_dp_val = random.random() * env.get_nominal_domain_param(
        )[sel_dp_change]
        new_dp_val = abs(
            new_dp_val) + 1e-6  # due to the domain of the new params
        transformed_new_dp_val = wenv.forward(new_dp_val)
        wenv.domain_param = {
            sel_dp_change: transformed_new_dp_val
        }  # calls inverse transform
        if not isinstance(inner_env(wenv), SimPyEnv):
            wenv.reset(
            )  # the RcsPySim and MujocoSim classes need to be reset to apply the new domain param

        # Test the actual domain param and the the getters
        assert inner_env(wenv)._domain_param[sel_dp_change] == pytest.approx(
            new_dp_val, abs=1e-5)
        assert wenv.domain_param[sel_dp_change] == pytest.approx(new_dp_val,
                                                                 abs=1e-5)
        assert wenv.domain_param[sel_dp_fix] != pytest.approx(new_dp_val)
コード例 #7
0
def test_parallel_sampling_deterministic_smoke_test_w_min_steps(
        tmpdir_factory, env: SimEnv, policy: Policy, algo, min_rollouts: int,
        min_steps: int):
    env.max_steps = 20

    seeds = (0, 1)
    nums_workers = (1, 2, 4)

    logging_results = []
    rollout_results: List[List[List[List[StepSequence]]]] = []
    for seed in seeds:
        logging_results.append((seed, []))
        rollout_results.append([])
        for num_workers in nums_workers:
            pyrado.set_seed(seed)
            policy.init_param(None)
            ex_dir = str(
                tmpdir_factory.mktemp(
                    f"seed={seed}-num_workers={num_workers}"))
            set_log_prefix_dir(ex_dir)
            vfcn = FNN(input_size=env.obs_space.flat_dim,
                       output_size=1,
                       hidden_sizes=[16, 16],
                       hidden_nonlin=to.tanh)
            critic = GAE(vfcn,
                         gamma=0.98,
                         lamda=0.95,
                         batch_size=32,
                         lr=1e-3,
                         standardize_adv=False)
            alg = algo(
                ex_dir,
                env,
                policy,
                critic,
                max_iter=3,
                min_rollouts=min_rollouts,
                min_steps=min_steps * env.max_steps,
                num_workers=num_workers,
            )
            alg.sampler = RolloutSavingWrapper(alg.sampler)
            alg.train()
            with open(f"{ex_dir}/progress.csv") as f:
                logging_results[-1][1].append(str(f.read()))
            rollout_results[-1].append(alg.sampler.rollouts)

    # Test that the observations for all number of workers are equal.
    for rollouts in rollout_results:
        for ros_a, ros_b in [(a, b) for a in rollouts for b in rollouts]:
            assert len(ros_a) == len(ros_b)
            for ro_a, ro_b in zip(ros_a, ros_b):
                assert len(ro_a) == len(ro_b)
                for r_a, r_b in zip(ro_a, ro_b):
                    assert r_a.observations == pytest.approx(r_b.observations)

    # Test that different seeds actually produce different results.
    for results_a, results_b in [(a, b) for seed_a, a in logging_results
                                 for seed_b, b in logging_results
                                 if seed_a != seed_b]:
        for result_a, result_b in [(a, b) for a in results_a for b in results_b
                                   if a is not b]:
            assert result_a != result_b

    # Test that same seeds produce same results.
    for _, results in logging_results:
        for result_a, result_b in [(a, b) for a in results for b in results]:
            assert result_a == result_b
コード例 #8
0
def test_npdr_and_bayessim(
    ex_dir,
    algo_name: str,
    env: SimEnv,
    num_segments: int,
    len_segments: int,
    num_real_rollouts: int,
    num_sbi_rounds: int,
    use_rec_act: bool,
):
    pyrado.set_seed(0)

    # Create a fake ground truth target domain
    env_real = deepcopy(env)
    dp_nom = env.get_nominal_domain_param()
    env_real.domain_param = dict(mass_pend_pole=dp_nom["mass_pend_pole"] * 1.2,
                                 length_pend_pole=dp_nom["length_pend_pole"] *
                                 0.8)

    # Reduce the number of steps to make this test run faster
    env.max_steps = 40
    env_real.max_steps = 40

    # Policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Define a mapping: index - domain parameter
    dp_mapping = {1: "mass_pend_pole", 2: "length_pend_pole"}

    # Prior
    prior_hparam = dict(
        low=to.tensor(
            [dp_nom["mass_pend_pole"] * 0.5,
             dp_nom["length_pend_pole"] * 0.5]),
        high=to.tensor(
            [dp_nom["mass_pend_pole"] * 1.5,
             dp_nom["length_pend_pole"] * 1.5]),
    )
    prior = sbiutils.BoxUniform(**prior_hparam)

    # Time series embedding
    embedding = BayesSimEmbedding(
        env.spec,
        RolloutSamplerForSBI.get_dim_data(env.spec),
        downsampling_factor=3,
    )

    # Posterior (normalizing flow)
    posterior_hparam = dict(model="maf",
                            embedding_net=nn.Identity(),
                            hidden_features=20,
                            num_transforms=3)

    # Policy optimization subroutine
    subrtn_policy_hparam = dict(
        max_iter=1,
        pop_size=2,
        num_init_states_per_domain=1,
        num_domains=2,
        expl_std_init=0.1,
        expl_factor=1.1,
        num_workers=1,
    )
    subrtn_policy = HCNormal(ex_dir, env, policy, **subrtn_policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1,
        num_sim_per_round=20,
        num_real_rollouts=num_real_rollouts,
        num_sbi_rounds=num_sbi_rounds,
        simulation_batch_size=1,
        normalize_posterior=False,
        num_eval_samples=2,
        num_segments=num_segments,
        len_segments=len_segments,
        use_rec_act=use_rec_act,
        stop_on_done=True,
        subrtn_sbi_training_hparam=dict(
            max_num_epochs=1),  # only train for 1 iteration
        # subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=True, mcmc_parameters=dict(warmup_steps=20)),
        num_workers=1,
    )
    skip = False
    if algo_name == NPDR.name:
        algo = NPDR(
            save_dir=ex_dir,
            env_sim=env,
            env_real=env_real,
            policy=policy,
            dp_mapping=dp_mapping,
            prior=prior,
            embedding=embedding,
            subrtn_sbi_class=SNPE_C,
            posterior_hparam=posterior_hparam,
            subrtn_policy=subrtn_policy,
            **algo_hparam,
        )
    elif algo_name == BayesSim.name:
        # We are not checking multi-round SNPE-A since it has known issues
        if algo_hparam["num_sbi_rounds"] > 1:
            skip = True
        algo = BayesSim(
            save_dir=ex_dir,
            env_sim=env,
            env_real=env_real,
            policy=policy,
            dp_mapping=dp_mapping,
            embedding=embedding,
            prior=prior,
            subrtn_policy=subrtn_policy,
            **algo_hparam,
        )
    else:
        raise NotImplementedError

    if not skip:
        algo.train()
        # Just checking the interface here
        assert algo.curr_iter == algo.max_iter
コード例 #9
0
def test_sbi_embedding(
    ex_dir,
    env: SimEnv,
    embedding_name: str,
    num_segments: int,
    len_segments: int,
    stop_on_done: bool,
    state_mask_labels: Union[None, List[str]],
    act_mask_labels: Union[None, List[str]],
):
    pyrado.set_seed(0)

    # Reduce the number of steps to make this test run faster
    env.max_steps = 80

    # Policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Define a mapping: index - domain parameter
    dp_mapping = {1: "mass_pend_pole", 2: "length_pend_pole"}

    # Time series embedding
    if embedding_name == LastStepEmbedding.name:
        embedding = LastStepEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    elif embedding_name == AllStepsEmbedding.name:
        embedding = AllStepsEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            env.max_steps,
            downsampling_factor=3,
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    elif embedding_name == DeltaStepsEmbedding.name:
        embedding = DeltaStepsEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            env.max_steps,
            downsampling_factor=3,
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    elif embedding_name == BayesSimEmbedding.name:
        embedding = BayesSimEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            downsampling_factor=3,
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    elif embedding_name == DynamicTimeWarpingEmbedding.name:
        embedding = DynamicTimeWarpingEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            downsampling_factor=3,
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    elif embedding_name == RNNEmbedding.name:
        embedding = RNNEmbedding(
            env.spec,
            RolloutSamplerForSBI.get_dim_data(env.spec),
            hidden_size=10,
            num_recurrent_layers=1,
            output_size=1,
            len_rollouts=env.max_steps,
            downsampling_factor=1,
            state_mask_labels=state_mask_labels,
            act_mask_labels=act_mask_labels,
        )
    else:
        raise NotImplementedError

    sampler = SimRolloutSamplerForSBI(
        env,
        policy,
        dp_mapping,
        embedding,
        num_segments,
        len_segments,
        stop_on_done,
        rollouts_real=None,
        use_rec_act=False,
    )

    # Test with 7 domain parameter sets
    data_sim = sampler(to.abs(to.randn(7, 2)))
    assert data_sim.shape == (7, embedding.dim_output)
コード例 #10
0
ファイル: test_plotting.py プロジェクト: fdamken/SimuRLacra
def test_pair_plot_scatter(
    env: SimEnv,
    policy: Policy,
    layout: str,
    labels: Optional[str],
    legend_labels: Optional[str],
    axis_limits: Optional[str],
    use_kde: bool,
    use_trafo: bool,
):
    def _simulator(dp: to.Tensor) -> to.Tensor:
        """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope"""
        ro = rollout(
            env,
            policy,
            eval=True,
            reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2])))
        observation_sim = to.from_numpy(
            ro.observations[-1]).to(dtype=to.float32)
        return to.atleast_2d(observation_sim)

    # Fix the init state
    env.init_space = SingularStateSpace(env.init_space.sample_uniform())
    env_real = deepcopy(env)
    env_real.domain_param = {"mass": 0.8, "stiffness": 15, "d": 0.7}

    # Optionally transformed domain parameters for inference
    if use_trafo:
        env = LogDomainParamTransform(env, mask=["stiffness"])

    # Domain parameter mapping and prior
    dp_mapping = {0: "mass", 1: "stiffness", 2: "d"}
    k_low = np.log(10) if use_trafo else 10
    k_up = np.log(20) if use_trafo else 20
    prior = sbiutils.BoxUniform(low=to.tensor([0.5, k_low, 0.2]),
                                high=to.tensor([1.5, k_up, 0.8]))

    # Learn a likelihood from the simulator
    density_estimator = sbiutils.posterior_nn(model="maf",
                                              hidden_features=10,
                                              num_transforms=3)
    snpe = SNPE(prior, density_estimator)
    simulator, prior = prepare_for_sbi(_simulator, prior)
    domain_param, data_sim = simulate_for_sbi(simulator=simulator,
                                              proposal=prior,
                                              num_simulations=50,
                                              num_workers=1)
    snpe.append_simulations(domain_param, data_sim)
    density_estimator = snpe.train(max_num_epochs=5)
    posterior = snpe.build_posterior(density_estimator)

    # Create a fake (random) true domain parameter
    domain_param_gt = to.tensor([
        env_real.domain_param[dp_mapping[key]]
        for key in sorted(dp_mapping.keys())
    ])
    domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 10
    domain_param_gt = domain_param_gt.unsqueeze(0)
    data_real = simulator(domain_param_gt)

    domain_params, log_probs = SBIBase.eval_posterior(
        posterior,
        data_real,
        num_samples=6,
        normalize_posterior=False,
        subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=False),
    )
    dp_samples = [
        domain_params.reshape(1, -1, domain_params.shape[-1]).squeeze()
    ]

    if layout == "inside":
        num_rows, num_cols = len(dp_mapping), len(dp_mapping)
    else:
        num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1

    _, axs = plt.subplots(num_rows,
                          num_cols,
                          figsize=(8, 8),
                          tight_layout=True)
    fig = draw_posterior_pairwise_scatter(
        axs=axs,
        dp_samples=dp_samples,
        dp_mapping=dp_mapping,
        prior=prior if axis_limits == "use_prior" else None,
        env_sim=env,
        env_real=env_real,
        axis_limits=axis_limits,
        marginal_layout=layout,
        labels=labels,
        legend_labels=legend_labels,
        use_kde=use_kde,
    )
    assert fig is not None
コード例 #11
0
ファイル: test_plotting.py プロジェクト: fdamken/SimuRLacra
def test_pair_plot(
    env: SimEnv,
    policy: Policy,
    layout: str,
    labels: Optional[str],
    prob_labels: Optional[str],
    use_prior: bool,
    use_trafo: bool,
):
    def _simulator(dp: to.Tensor) -> to.Tensor:
        """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope"""
        ro = rollout(
            env,
            policy,
            eval=True,
            reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2])))
        observation_sim = to.from_numpy(
            ro.observations[-1]).to(dtype=to.float32)
        return to.atleast_2d(observation_sim)

    # Fix the init state
    env.init_space = SingularStateSpace(env.init_space.sample_uniform())
    env_real = deepcopy(env)
    env_real.domain_param = {"mass": 0.8, "stiffness": 35, "d": 0.7}

    # Optionally transformed domain parameters for inference
    if use_trafo:
        env = SqrtDomainParamTransform(env, mask=["stiffness"])

    # Domain parameter mapping and prior
    dp_mapping = {0: "mass", 1: "stiffness", 2: "d"}
    prior = sbiutils.BoxUniform(low=to.tensor([0.5, 20, 0.2]),
                                high=to.tensor([1.5, 40, 0.8]))

    # Learn a likelihood from the simulator
    density_estimator = sbiutils.posterior_nn(model="maf",
                                              hidden_features=10,
                                              num_transforms=3)
    snpe = SNPE(prior, density_estimator)
    simulator, prior = prepare_for_sbi(_simulator, prior)
    domain_param, data_sim = simulate_for_sbi(simulator=simulator,
                                              proposal=prior,
                                              num_simulations=50,
                                              num_workers=1)
    snpe.append_simulations(domain_param, data_sim)
    density_estimator = snpe.train(max_num_epochs=5)
    posterior = snpe.build_posterior(density_estimator)

    # Create a fake (random) true domain parameter
    domain_param_gt = to.tensor(
        [env_real.domain_param[key] for _, key in dp_mapping.items()])
    domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 5
    domain_param_gt = domain_param_gt.unsqueeze(0)
    data_real = simulator(domain_param_gt)

    # Get a (random) condition
    condition = Embedding.pack(domain_param_gt.clone())

    if layout == "inside":
        num_rows, num_cols = len(dp_mapping), len(dp_mapping)
    else:
        num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1

    if use_prior:
        grid_bounds = None
    else:
        prior = None
        grid_bounds = to.cat(
            [to.zeros((len(dp_mapping), 1)),
             to.ones((len(dp_mapping), 1))],
            dim=1)

    _, axs = plt.subplots(num_rows,
                          num_cols,
                          figsize=(14, 14),
                          tight_layout=True)
    fig = draw_posterior_pairwise_heatmap(
        axs,
        posterior,
        data_real,
        dp_mapping,
        condition,
        prior=prior,
        env_real=env_real,
        marginal_layout=layout,
        grid_bounds=grid_bounds,
        grid_res=100,
        normalize_posterior=False,
        rescale_posterior=True,
        labels=None if labels is None else [""] * len(dp_mapping),
        prob_labels=prob_labels,
    )

    assert fig is not None
コード例 #12
0
def test_combination(env: SimEnv):
    pyrado.set_seed(0)
    env.max_steps = 20

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r,
                DummyPolicy(env_r.spec),
                eval=True,
                seed=0,
                render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {"x_dot": -213.0, "theta_dot": -42.0}
    eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r,
                   DummyPolicy(env_r.spec),
                   eval=True,
                   seed=0,
                   render_mode=RenderMode())
    ro_rn = rollout(env_rn,
                    DummyPolicy(env_rn.spec),
                    eval=True,
                    seed=0,
                    render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations),
                       ro_rn.observations)

    env_rnp = ObsPartialWrapper(
        env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]])
    ro_rnp = rollout(env_rnp,
                     DummyPolicy(env_rnp.spec),
                     eval=True,
                     seed=0,
                     render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(
        env_rnp,
        noise_mean=0.5 * np.ones(env_rnp.act_space.shape),
        noise_std=0.1 * np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa,
                      DummyPolicy(env_rnpa.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert not np.allclose(
        ro_rnp.observations,
        ro_rnpa.observations)  # the action noise changed to rollout

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd,
                      DummyPolicy(env_rnpd.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert type(inner_env(env_rnpd)) == type(env)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)