def __init__(
        self,
        env,
        policy,
        num_workers: int,
        *,
        min_rollouts: int = None,
        min_steps: int = None,
        show_progress_bar: bool = True,
        seed: int = NO_SEED_PASSED,
    ):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy to act in the environment (can also be an exploration strategy)
        :param num_workers: number of parallel samplers
        :param min_rollouts: minimum number of complete rollouts to sample
        :param min_steps: minimum total number of steps to sample
        :param show_progress_bar: it `True`, display a progress bar using `tqdm`
        :param seed: seed value for the random number generators, pass `None` for no seeding; defaults to the last seed
                     that was set with `pyrado.set_seed`
        """
        Serializable._init(self, locals())
        super().__init__(min_rollouts=min_rollouts, min_steps=min_steps)

        self.env = env
        self.policy = policy
        self.show_progress_bar = show_progress_bar

        # Set method to spawn if using cuda
        if mp.get_start_method(allow_none=True) != "spawn":
            mp.set_start_method("spawn", force=True)

        # Create parallel pool. We use one thread per env because it's easier.
        self.pool = SamplerPool(num_workers)

        if seed is NO_SEED_PASSED:
            seed = pyrado.get_base_seed()
        self._seed = seed
        # Initialize with -1 such that we start with the 0-th sample. Incrementing after sampling may cause issues when
        # the sampling crashes and the sample count is not incremented.
        self._sample_count = -1

        # Distribute environments. We use pickle to make sure a copy is created for n_envs=1
        self.pool.invoke_all(_ps_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
Example #2
0
    def __init__(self,
                 env: Env,
                 policy: Policy,
                 num_workers: int,
                 num_rollouts_per_param: int,
                 seed: int = None):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy used for sampling
        :param num_workers: number of parallel samplers
        :param num_rollouts_per_param: number of rollouts per policy parameter set (and init state if specified)
        :param seed: seed value for the random number generators, pass `None` for no seeding
        """
        if not isinstance(num_rollouts_per_param, int):
            raise pyrado.TypeErr(given=num_rollouts_per_param,
                                 expected_type=int)
        if num_rollouts_per_param < 1:
            raise pyrado.ValueErr(given=num_rollouts_per_param,
                                  ge_constraint='1')

        Serializable._init(self, locals())

        # Check environment for domain randomization wrappers (stops after finding the outermost)
        self._dr_wrapper = typed_env(env, DomainRandWrapper)
        if self._dr_wrapper is not None:
            assert isinstance(inner_env(env), SimEnv)
            # Remove them all from the env chain since we sample the domain parameter later explicitly
            env = remove_all_dr_wrappers(env)

        self.env, self.policy = env, policy
        self.num_rollouts_per_param = num_rollouts_per_param

        # Create parallel pool. We use one thread per environment because it's easier.
        self.pool = SamplerPool(num_workers)

        # Set all rngs' seeds
        if seed is not None:
            self.pool.set_seed(seed)

        # Distribute environments. We use pickle to make sure a copy is created for n_envs = 1
        self.pool.invoke_all(_pes_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
Example #3
0
    def __init__(
        self,
        wrapped_env: Env,
        parameters: Sequence[DomainParam],
        inner_policy: Policy,
        discriminator,
        step_length: float = 0.01,
        horizon: int = 50,
        num_rollouts_per_config: int = 8,
        num_workers: int = 4,
    ):
        """
        Constructor

        :param wrapped_env: the environment to wrap
        :param parameters: which physics parameters should be randomized
        :param inner_policy: the policy to train the subrtn on
        :param discriminator: the discriminator to distinguish reference environments from randomized ones
        :param step_length: the step size
        :param horizon: an svpg horizon
        :param num_rollouts_per_config: number of trajectories to sample per physics configuration
        :param num_workers: number of environments for parallel sampling
        """
        Serializable._init(self, locals())

        EnvWrapper.__init__(self, wrapped_env)

        self.parameters: Sequence[DomainParam] = parameters
        self.pool = SamplerPool(num_workers)
        self.inner_policy = inner_policy
        self.svpg_state = None
        self.count = 0
        self.num_trajs = num_rollouts_per_config
        self.svpg_max_step_length = step_length
        self.discriminator = discriminator
        self.max_steps = 8
        self._adapter_obs_space = BoxSpace(-np.ones(len(parameters)),
                                           np.ones(len(parameters)))
        self._adapter_act_space = BoxSpace(-np.ones(len(parameters)),
                                           np.ones(len(parameters)))
        self.horizon = horizon
        self.horizon_count = 0
Example #4
0
    def __init__(self,
                 env,
                 policy,
                 num_workers: int,
                 *,
                 min_rollouts: int = None,
                 min_steps: int = None,
                 show_progress_bar: bool = True,
                 seed: int = None):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy to act in the environment (can also be an exploration strategy)
        :param num_workers: number of parallel samplers
        :param min_rollouts: minimum number of complete rollouts to sample
        :param min_steps: minimum total number of steps to sample
        :param show_progress_bar: it `True`, display a progress bar using `tqdm`
        :param seed: seed value for the random number generators, pass `None` for no seeding
        """
        Serializable._init(self, locals())
        super().__init__(min_rollouts=min_rollouts, min_steps=min_steps)

        self.env = env
        self.policy = policy
        self.show_progress_bar = show_progress_bar

        # Set method to spawn if using cuda
        if self.policy.device == 'cuda':
            mp.set_start_method('spawn', force=True)

        # Create parallel pool. We use one thread per env because it's easier.
        self.pool = SamplerPool(num_workers)

        # Set all rngs' seeds
        if seed is not None:
            self.set_seed(seed)

        # Distribute environments. We use pickle to make sure a copy is created for n_envs=1
        self.pool.invoke_all(_ps_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
Example #5
0
    def __init__(self,
                 env,
                 policy,
                 num_envs: int,
                 *,
                 min_rollouts: int = None,
                 min_steps: int = None,
                 bernoulli_reset: bool = None,
                 seed: int = None):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy to act in the environment (can also be an exploration strategy)
        :param num_envs: number of parallel samplers
        :param min_rollouts: minimum number of complete rollouts to sample.
        :param min_steps: minimum total number of steps to sample.
        :param bernoulli_reset: probability for resetting after the current time step
        :param seed: Seed to use. Every subprocess is seeded with seed+thread_number
        """
        Serializable._init(self, locals())
        super().__init__(min_rollouts=min_rollouts, min_steps=min_steps)

        self.env = env
        self.policy = policy
        self.bernoulli_reset = bernoulli_reset

        # Set method to spawn if using cuda
        if self.policy.device == 'cuda':
            mp.set_start_method('spawn', force=True)

        # Create parallel pool. We use one thread per env because it's easier.
        self.pool = SamplerPool(num_envs)

        if seed is not None:
            self.pool.set_seed(seed)

        # Distribute environments. We use pickle to make sure a copy is created for n_envs=1
        self.pool.invoke_all(_ps_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy),
                             pickle.dumps(self.bernoulli_reset))
Example #6
0
    def __init__(self,
                 wrapped_env: Env,
                 parameters: ADR.PhysicsParameters,
                 inner_policy: Policy,
                 discriminator: RewardGenerator,
                 step_length=0.01,
                 horizon=50,
                 num_trajs_per_config=8,
                 num_sampler_envs: int = 4):
        """
        Constructor

        :param wrapped_env: the environment to wrap
        :param parameters: which physics parameters should be randomized
        :param inner_policy: the policy to train the subroutine on
        :param discriminator: the discriminator to distinguish reference envs from randomized ones
        :param step_length: the step size
        :param horizon: an svpg horizon
        :param num_trajs_per_config: number of trajectories to sample per physics configuration
        :param num_sampler_envs: the number of samplers operating in parallel
        """
        Serializable._init(self, locals())
        EnvWrapper.__init__(self, wrapped_env)
        self.parameters = parameters
        self.pool = SamplerPool(num_sampler_envs)
        self.inner_policy = inner_policy
        self.state = None
        self.count = 0
        self.num_trajs = num_trajs_per_config
        self.svpg_max_step_length = step_length
        self.discriminator = discriminator
        self.max_steps = 8
        self._adapter_obs_space = BoxSpace(-np.ones(self.parameters.length), np.ones(self.parameters.length))
        self._adapter_act_space = BoxSpace(-np.ones(self.parameters.length), np.ones(self.parameters.length))
        self.horizon = horizon

        self.horizon_count = 0
Example #7
0
    def __init__(self,
                 save_dir: str,
                 env: Env,
                 subroutine: Algorithm,
                 max_iter: int,
                 svpg_particle_hparam: dict,
                 num_svpg_particles: int,
                 num_discriminator_epoch: int,
                 batch_size: int,
                 svpg_learning_rate: float = 3e-4,
                 svpg_temperature: float = 10,
                 svpg_evaluation_steps: int = 10,
                 svpg_horizon: int = 50,
                 svpg_kl_factor: float = 0.03,
                 svpg_warmup: int = 0,
                 svpg_serial: bool = False,
                 num_sampler_envs: int = 4,
                 num_trajs_per_config: int = 8,
                 max_step_length: float = 0.05,
                 randomized_params=None,
                 logger: StepLogger = None):
        """
        Constructor

        TODO @Robin
        :param save_dir: directory to save the snapshots i.e. the results in
        :param env:
        :param subroutine: algorithm which performs the policy / value-function optimization
        :param max_iter:
        :param svpg_particle_hparam:
        :param num_svpg_particles:
        :param num_discriminator_epoch:
        :param batch_size:
        :param svpg_learning_rate:
        :param svpg_temperature:
        :param svpg_evaluation_steps:
        :param svpg_horizon:
        :param svpg_kl_factor:
        :param svpg_warmup:
        :param svpg_serial:
        :param num_sampler_envs:
        :param num_trajs_per_config:
        :param max_step_length:
        :param randomized_params:
        :param logger:
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not isinstance(subroutine, Algorithm):
            raise pyrado.TypeErr(given=subroutine, expected_type=Algorithm)
        if not isinstance(subroutine.policy, Policy):
            raise pyrado.TypeErr(given=subroutine.policy, expected_type=Policy)

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, subroutine.policy, logger)
        self.log_loss = True

        # Store the inputs
        self.env = env
        self.subroutine = subroutine
        self.num_particles = num_svpg_particles
        self.num_discriminator_epoch = num_discriminator_epoch
        self.batch_size = batch_size
        self.num_trajs_per_config = num_trajs_per_config
        self.warm_up_time = svpg_warmup
        self.svpg_evaluation_steps = svpg_evaluation_steps
        self.svpg_temperature = svpg_temperature
        self.svpg_lr = svpg_learning_rate
        self.svpg_max_step_length = max_step_length
        self.svpg_horizon = svpg_horizon
        self.svpg_kl_factor = svpg_kl_factor

        # Get the number of params
        self.params = self.PhysicsParameters(env, randomized_params)
        self.num_params = self.params.length

        # Initialize the sampler
        self.pool = SamplerPool(num_sampler_envs)

        # Initialize reward generator
        self.reward_generator = RewardGenerator(
            env.spec,
            self.batch_size,
            reward_multiplier=1,
            lr=1e-3,
            logger=self.logger
        )

        # Initialize step counter
        self.curr_time_step = 0

        # Initialize logbook
        self.sim_instances_full_horizon = np.random.random_sample(
            (self.num_particles, self.svpg_horizon, self.svpg_evaluation_steps, self.num_params)
        )

        self.svpg_wrapper = SVPGAdapter(
            env,
            self.params,
            subroutine.expl_strat,
            self.reward_generator,
            horizon=self.svpg_horizon,
            num_trajs_per_config=self.num_trajs_per_config,
            num_sampler_envs=num_sampler_envs,
        )

        # Initialize SVPG
        self.svpg = SVPG(
            save_dir,
            self.svpg_wrapper,
            svpg_particle_hparam,
            max_iter,
            self.num_particles,
            self.svpg_temperature,
            self.svpg_lr,
            self.svpg_horizon,
            serial=svpg_serial,
            num_sampler_envs=num_sampler_envs,
            logger=logger
        )
    def __init__(
        self,
        env: Union[SimEnv, EnvWrapper],
        policy: Policy,
        num_init_states_per_domain: int,
        num_domains: int,
        num_workers: int,
        seed: Optional[int] = None,
    ):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy used for sampling
        :param num_init_states_per_domain: number of rollouts to cover the variance over initial states
        :param num_domains: number of rollouts due to the variance over domain parameters
        :param num_workers: number of parallel samplers
        :param seed: seed value for the random number generators, pass `None` for no seeding; defaults to the last seed
                     that was set with `pyrado.set_seed`
        """
        if not isinstance(num_init_states_per_domain, int):
            raise pyrado.TypeErr(given=num_init_states_per_domain,
                                 expected_type=int)
        if num_init_states_per_domain < 1:
            raise pyrado.ValueErr(given=num_init_states_per_domain,
                                  ge_constraint="1")
        if not isinstance(num_domains, int):
            raise pyrado.TypeErr(given=num_domains, expected_type=int)
        if num_domains < 1:
            raise pyrado.ValueErr(given=num_domains, ge_constraint="1")

        Serializable._init(self, locals())

        # Check environment for domain randomization wrappers (stops after finding the outermost)
        self._dr_wrapper = typed_env(env, DomainRandWrapper)
        if self._dr_wrapper is not None:
            assert isinstance(inner_env(env), SimEnv)
            # Remove them all from the env chain since we sample the domain parameter later explicitly
            env = remove_all_dr_wrappers(env)

        self.env, self.policy = env, policy
        self.num_init_states_per_domain = num_init_states_per_domain
        self.num_domains = num_domains

        # Set method to spawn if using cuda
        if mp.get_start_method(allow_none=True) != "spawn":
            mp.set_start_method("spawn", force=True)

        # Create parallel pool. We use one thread per environment because it's easier.
        self.pool = SamplerPool(num_workers)

        if seed is NO_SEED_PASSED:
            seed = pyrado.get_base_seed()
        self._seed = seed
        # Initialize with -1 such that we start with the 0-th sample. Incrementing after sampling may cause issues when
        # the sampling crashes and the sample count is not incremented.
        self._sample_count = -1

        # Distribute environments. We use pickle to make sure a copy is created for n_envs = 1
        self.pool.invoke_all(_pes_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
    env_sim_list = []
    policy_list = []
    for ex_dir in ex_dirs:
        env_sim, policy, _ = load_experiment(ex_dir, args)
        policy_list.append(policy)

    # Fix initial state (set to None if it should not be fixed)
    init_state_list = [None]*args.num_ro_per_config

    # Crate empty data frame
    df = pd.DataFrame(columns=['policy', 'ret', 'len'])

    # Evaluate all policies
    for i, (env_sim, policy) in enumerate(zip(env_sim_list, policy_list)):
        # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states
        pool = SamplerPool(args.num_workers)

        # Seed the sampler
        if args.seed is not None:
            pool.set_seed(args.seed)
            print_cbt(f"Set the random number generators' seed to {args.seed}.", 'w')
        else:
            print_cbt('No seed was set', 'y')

        # Add the same wrappers as during training
        env = wrap_like_other_env(env, env_sim)

        # Sample rollouts
        ros = eval_randomized_domain(pool, env, pert, policy, init_state_list)  # internally calls DomainRandWrapperLive

        # Compute results metrics
Example #10
0
        policies.append(policy)

    # Create one-dim results grid and ensure right number of rollouts
    param_list = param_grid(param_spec)
    param_list *= args.num_ro_per_config

    # Fix initial state (set to None if it should not be fixed)
    init_state = None

    # Crate empty data frame
    df = pd.DataFrame(columns=['policy', 'ret', 'len', varied_param_key])

    # Evaluate all policies
    for i, policy in enumerate(policies):
        # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states
        pool = SamplerPool(args.num_envs)

        # Seed the sampler
        if args.seed is not None:
            pool.set_seed(args.seed)
            print_cbt(f'Set seed to {args.seed}', 'y')
        else:
            print_cbt('No seed was set', 'r', bright=True)

        # Add an action normalization wrapper if the policy was trained with one
        env = conditional_actnorm_wrapper(env, ex_dirs, i)

        # Sample rollouts
        ros = eval_domain_params(pool, env, policy, param_list, init_state)

        # Compute results metrics
Example #11
0
    def __init__(
        self,
        save_dir: pyrado.PathLike,
        env: Env,
        subrtn: Algorithm,
        max_iter: int,
        svpg_particle_hparam: dict,
        num_svpg_particles: int,
        num_discriminator_epoch: int,
        batch_size: int,
        svpg_learning_rate: float = 3e-4,
        svpg_temperature: float = 10,
        svpg_evaluation_steps: int = 10,
        svpg_horizon: int = 50,
        svpg_kl_factor: float = 0.03,
        svpg_warmup: int = 0,
        svpg_serial: bool = False,
        num_workers: int = 4,
        num_trajs_per_config: int = 8,
        max_step_length: float = 0.05,
        randomized_params: Sequence[str] = None,
        logger: Optional[StepLogger] = None,
    ):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param env: the environment to train in
        :param subrtn: algorithm which performs the policy / value-function optimization
        :param max_iter: maximum number of iterations
        :param svpg_particle_hparam: SVPG particle hyperparameters
        :param num_svpg_particles: number of SVPG particles
        :param num_discriminator_epoch: epochs in discriminator training
        :param batch_size: batch size for training
        :param svpg_learning_rate: SVPG particle optimizers' learning rate
        :param svpg_temperature: SVPG temperature coefficient (how strong is the influence of the particles on each other)
        :param svpg_evaluation_steps: how many configurations to sample between training
        :param svpg_horizon: how many steps until the particles are reset
        :param svpg_kl_factor: kl reward coefficient
        :param svpg_warmup: number of iterations without SVPG training in the beginning
        :param svpg_serial: serial mode (see SVPG)
        :param num_workers: number of environments for parallel sampling
        :param num_trajs_per_config: number of trajectories to sample from each config
        :param max_step_length: maximum change of physics parameters per step
        :param randomized_params: which parameters to randomize
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not isinstance(subrtn, Algorithm):
            raise pyrado.TypeErr(given=subrtn, expected_type=Algorithm)
        if not isinstance(subrtn.policy, Policy):
            raise pyrado.TypeErr(given=subrtn.policy, expected_type=Policy)

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, subrtn.policy, logger)
        self.log_loss = True

        # Store the inputs
        self.env = env
        self._subrtn = subrtn
        self._subrtn.save_name = "subrtn"
        self.num_particles = num_svpg_particles
        self.num_discriminator_epoch = num_discriminator_epoch
        self.batch_size = batch_size
        self.num_trajs_per_config = num_trajs_per_config
        self.warm_up_time = svpg_warmup
        self.svpg_evaluation_steps = svpg_evaluation_steps
        self.svpg_temperature = svpg_temperature
        self.svpg_lr = svpg_learning_rate
        self.svpg_max_step_length = max_step_length
        self.svpg_horizon = svpg_horizon
        self.svpg_kl_factor = svpg_kl_factor

        self.pool = SamplerPool(num_workers)
        self.curr_time_step = 0

        # Get the number of params
        if isinstance(randomized_params, list) and len(randomized_params) == 0:
            randomized_params = inner_env(
                self.env).get_nominal_domain_param().keys()
        self.params = [DomainParam(param, 1) for param in randomized_params]
        self.num_params = len(self.params)

        # Initialize reward generator
        self.reward_generator = RewardGenerator(env.spec,
                                                self.batch_size,
                                                reward_multiplier=1,
                                                lr=1e-3,
                                                logger=self.logger)

        # Initialize logbook
        self.sim_instances_full_horizon = np.random.random_sample(
            (self.num_particles, self.svpg_horizon, self.svpg_evaluation_steps,
             self.num_params))

        # Initialize SVPG
        self.svpg_wrapper = SVPGAdapter(
            env,
            self.params,
            subrtn.expl_strat,
            self.reward_generator,
            horizon=self.svpg_horizon,
            num_rollouts_per_config=self.num_trajs_per_config,
            num_workers=num_workers,
        )
        self.svpg = SVPG(
            save_dir,
            self.svpg_wrapper,
            svpg_particle_hparam,
            max_iter,
            self.num_particles,
            self.svpg_temperature,
            self.svpg_lr,
            self.svpg_horizon,
            serial=svpg_serial,
            num_workers=num_workers,
            logger=logger,
        )
        self.svpg.save_name = "subrtn_svpg"
Example #12
0
def evaluate_policy(args, ex_dir):
    """Helper function to evaluate the policy from an experiment in the associated environment."""
    env, policy, _ = load_experiment(ex_dir, args)

    # Create multi-dim evaluation grid
    param_spec = dict()
    param_spec_dim = None

    if isinstance(inner_env(env), BallOnPlateSim):
        param_spec["ball_radius"] = np.linspace(0.02, 0.08, num=2, endpoint=True)
        param_spec["ball_rolling_friction_coefficient"] = np.linspace(0.0295, 0.9, num=2, endpoint=True)

    elif isinstance(inner_env(env), QQubeSwingUpSim):
        eval_num = 200
        # Use nominal values for all other parameters.
        for param, nominal_value in env.get_nominal_domain_param().items():
            param_spec[param] = nominal_value
        # param_spec["gravity_const"] = np.linspace(5.0, 15.0, num=eval_num, endpoint=True)
        param_spec["damping_pend_pole"] = np.linspace(0.0, 0.0001, num=eval_num, endpoint=True)
        param_spec["damping_rot_pole"] = np.linspace(0.0, 0.0006, num=eval_num, endpoint=True)
        param_spec_dim = 2

    elif isinstance(inner_env(env), QBallBalancerSim):
        # param_spec["gravity_const"] = np.linspace(7.91, 11.91, num=11, endpoint=True)
        # param_spec["ball_mass"] = np.linspace(0.003, 0.3, num=11, endpoint=True)
        # param_spec["ball_radius"] = np.linspace(0.01, 0.1, num=11, endpoint=True)
        param_spec["plate_length"] = np.linspace(0.275, 0.275, num=11, endpoint=True)
        param_spec["arm_radius"] = np.linspace(0.0254, 0.0254, num=11, endpoint=True)
        # param_spec["load_inertia"] = np.linspace(5.2822e-5*0.5, 5.2822e-5*1.5, num=11, endpoint=True)
        # param_spec["motor_inertia"] = np.linspace(4.6063e-7*0.5, 4.6063e-7*1.5, num=11, endpoint=True)
        # param_spec["gear_ratio"] = np.linspace(60, 80, num=11, endpoint=True)
        # param_spec["gear_efficiency"] = np.linspace(0.6, 1.0, num=11, endpoint=True)
        # param_spec["motor_efficiency"] = np.linspace(0.49, 0.89, num=11, endpoint=True)
        # param_spec["motor_back_emf"] = np.linspace(0.006, 0.066, num=11, endpoint=True)
        # param_spec["motor_resistance"] = np.linspace(2.6*0.5, 2.6*1.5, num=11, endpoint=True)
        # param_spec["combined_damping"] = np.linspace(0.0, 0.05, num=11, endpoint=True)
        # param_spec["friction_coeff"] = np.linspace(0, 0.015, num=11, endpoint=True)
        # param_spec["voltage_thold_x_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True)
        # param_spec["voltage_thold_x_neg"] = np.linspace(-1., 0.0, num=11, endpoint=True)
        # param_spec["voltage_thold_y_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True)
        # param_spec["voltage_thold_y_neg"] = np.linspace(-1.0, 0, num=11, endpoint=True)
        # param_spec["offset_th_x"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True)
        # param_spec["offset_th_y"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True)

    else:
        raise NotImplementedError

    # Always add an action delay wrapper (with 0 delay by default)
    if typed_env(env, ActDelayWrapper) is None:
        env = ActDelayWrapper(env)
    # param_spec['act_delay'] = np.linspace(0, 30, num=11, endpoint=True, dtype=int)

    add_info = "-".join(param_spec.keys())

    # Create multidimensional results grid and ensure right number of rollouts
    param_list = param_grid(param_spec)
    param_list *= args.num_rollouts_per_config

    # Fix initial state (set to None if it should not be fixed)
    init_state = np.array([0.0, 0.0, 0.0, 0.0])

    # Create sampler
    pool = SamplerPool(args.num_workers)
    if args.seed is not None:
        pool.set_seed(args.seed)
        print_cbt(f"Set the random number generators' seed to {args.seed}.", "w")
    else:
        print_cbt("No seed was set", "y")

    # Sample rollouts
    ros = eval_domain_params(pool, env, policy, param_list, init_state)

    # Compute metrics
    lod = []
    for ro in ros:
        d = dict(**ro.rollout_info["domain_param"], ret=ro.undiscounted_return(), len=ro.length)
        # Simply remove the observation noise from the domain parameters
        try:
            d.pop("obs_noise_mean")
            d.pop("obs_noise_std")
        except KeyError:
            pass
        lod.append(d)

    df = pd.DataFrame(lod)
    metrics = dict(
        avg_len=df["len"].mean(),
        avg_ret=df["ret"].mean(),
        median_ret=df["ret"].median(),
        min_ret=df["ret"].min(),
        max_ret=df["ret"].max(),
        std_ret=df["ret"].std(),
    )
    pprint(metrics, indent=4)

    # Create subfolder and save
    timestamp = datetime.datetime.now()
    add_info = timestamp.strftime(pyrado.timestamp_format) + "--" + add_info
    save_dir = osp.join(ex_dir, "eval_domain_grid", add_info)
    os.makedirs(save_dir, exist_ok=True)

    save_dicts_to_yaml(
        {"ex_dir": str(ex_dir)},
        {"varied_params": list(param_spec.keys())},
        {"num_rpp": args.num_rollouts_per_config, "seed": args.seed},
        {"metrics": dict_arraylike_to_float(metrics)},
        save_dir=save_dir,
        file_name="summary",
    )
    pyrado.save(df, f"df_sp_grid_{len(param_spec) if param_spec_dim is None else param_spec_dim}d.pkl", save_dir)