Ejemplo n.º 1
0
def eval_domain_params(
        pool: SamplerPool,
        env: SimEnv,
        policy: Policy,
        params: List[Dict],
        init_state: Optional[np.ndarray] = None) -> List[StepSequence]:
    """
    Evaluate a policy on a multidimensional grid of domain parameters.

    :param pool: parallel sampler
    :param env: environment to evaluate in
    :param policy: policy to evaluate
    :param params: multidimensional grid of domain parameters
    :param init_state: initial state of the environment which will be fixed if not set to `None`
    :return: list of rollouts
    """
    # Strip all domain randomization wrappers from the environment
    env = remove_all_dr_wrappers(env, verbose=True)
    if init_state is not None:
        env.init_space = SingularStateSpace(fixed_state=init_state)

    pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy))

    # Run with progress bar
    with tqdm(leave=False, file=sys.stdout, unit="rollouts",
              desc="Sampling") as pb:
        return pool.run_map(
            functools.partial(_ps_run_one_domain_param, eval=True), params, pb)
Ejemplo n.º 2
0
def eval_randomized_domain(
        pool: SamplerPool, env: SimEnv, randomizer: DomainRandomizer,
        policy: Policy, init_states: List[np.ndarray]) -> List[StepSequence]:
    """
    Evaluate a policy in a randomized domain.

    :param pool: parallel sampler
    :param env: environment to evaluate in
    :param randomizer: randomizer used to sample random domain instances, inherited from `DomainRandomizer`
    :param policy: policy to evaluate
    :param init_states: initial states of the environment which will be fixed if not set to `None`
    :return: list of rollouts
    """
    # Randomize the environments
    env = remove_all_dr_wrappers(env)
    env = DomainRandWrapperLive(env, randomizer)

    pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy))

    # Run with progress bar
    with tqdm(leave=False, file=sys.stdout, unit="rollouts",
              desc="Sampling") as pb:
        return pool.run_map(
            functools.partial(_ps_run_one_init_state, eval=True), init_states,
            pb)
Ejemplo n.º 3
0
    def __init__(self,
                 env: Env,
                 policy: Policy,
                 num_workers: int,
                 num_rollouts_per_param: int,
                 seed: int = None):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy used for sampling
        :param num_workers: number of parallel samplers
        :param num_rollouts_per_param: number of rollouts per policy parameter set (and init state if specified)
        :param seed: seed value for the random number generators, pass `None` for no seeding
        """
        if not isinstance(num_rollouts_per_param, int):
            raise pyrado.TypeErr(given=num_rollouts_per_param,
                                 expected_type=int)
        if num_rollouts_per_param < 1:
            raise pyrado.ValueErr(given=num_rollouts_per_param,
                                  ge_constraint='1')

        Serializable._init(self, locals())

        # Check environment for domain randomization wrappers (stops after finding the outermost)
        self._dr_wrapper = typed_env(env, DomainRandWrapper)
        if self._dr_wrapper is not None:
            assert isinstance(inner_env(env), SimEnv)
            # Remove them all from the env chain since we sample the domain parameter later explicitly
            env = remove_all_dr_wrappers(env)

        self.env, self.policy = env, policy
        self.num_rollouts_per_param = num_rollouts_per_param

        # Create parallel pool. We use one thread per environment because it's easier.
        self.pool = SamplerPool(num_workers)

        # Set all rngs' seeds
        if seed is not None:
            self.pool.set_seed(seed)

        # Distribute environments. We use pickle to make sure a copy is created for n_envs = 1
        self.pool.invoke_all(_pes_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
Ejemplo n.º 4
0
def eval_nominal_domain(pool: SamplerPool, env: SimEnv, policy: Policy,
                        init_states: List[np.ndarray]) -> List[StepSequence]:
    """
    Evaluate a policy using the nominal (set in the given environment) domain parameters.

    :param pool: parallel sampler
    :param env: environment to evaluate in
    :param policy: policy to evaluate
    :param init_states: initial states of the environment which will be fixed if not set to `None`
    :return: list of rollouts
    """
    # Strip all domain randomization wrappers from the environment
    env = remove_all_dr_wrappers(env)

    pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy))

    # Run with progress bar
    with tqdm(leave=False, file=sys.stdout, unit="rollouts",
              desc="Sampling") as pb:
        return pool.run_map(
            functools.partial(_ps_run_one_init_state, eval=True), init_states,
            pb)
Ejemplo n.º 5
0
def load_experiment(
    ex_dir: str,
    args: Any = None
) -> Tuple[Optional[Union[SimEnv, EnvWrapper]], Optional[Policy],
           Optional[dict]]:
    """
    Load the (training) environment and the policy.
    This helper function first tries to read the hyper-parameters yaml-file in the experiment's directory to infer
    why entities should be loaded. If no file was found, we fall back to some heuristic and hope for the best.

    :param ex_dir: experiment's parent directory
    :param args: arguments from the argument parser, pass `None` to fall back to the values from the default argparser
    :return: environment, policy, and optional output (e.g. valuefcn)
    """
    env, policy, extra = None, None, dict()

    if args is None:
        # Fall back to default arguments. By passing [], we ignore the command line arguments
        args = get_argparser().parse_args([])

    # Hyper-parameters
    extra["hparams"] = load_hyperparameters(ex_dir)

    # Algorithm specific
    algo = Algorithm.load_snapshot(load_dir=ex_dir, load_name="algo")

    if algo.name == "spota":
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        if getattr(env, "randomizer", None) is not None:
            if not isinstance(env, DomainRandWrapperBuffer):
                raise pyrado.TypeErr(given=env,
                                     expected_type=DomainRandWrapperBuffer)
            typed_env(env, DomainRandWrapperBuffer).fill_buffer(10)
            print_cbt(
                f"Loaded the domain randomizer\n{env.randomizer}\nand filled it with 10 random instances.",
                "w")
        else:
            print_cbt("Loaded environment has no randomizer, or it is None.",
                      "r")
        # Policy
        policy = pyrado.load(algo.subroutine_cand.policy,
                             f"{args.policy_name}.pt",
                             ex_dir,
                             verbose=True)
        # Extra (value function)
        if isinstance(algo.subroutine_cand, ActorCritic):
            extra["vfcn"] = pyrado.load(algo.subroutine_cand.critic.vfcn,
                                        f"{args.vfcn_name}.pt",
                                        ex_dir,
                                        verbose=True)

    elif algo.name == "bayrn":
        # Environment
        env = pyrado.load("env_sim.pkl", ex_dir)
        if hasattr(env, "randomizer"):
            last_cand = to.load(osp.join(ex_dir, "candidates.pt"))[-1, :]
            env.adapt_randomizer(last_cand.numpy())
            print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w")
        else:
            print_cbt("Loaded environment has no randomizer, or it is None.",
                      "r")
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Extra (value function)
        if isinstance(algo.subroutine, ActorCritic):
            extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt",
                                        ex_dir,
                                        obj=algo.subroutine.critic.vfcn,
                                        verbose=True)

    elif algo.name == "simopt":
        # Environment
        env = pyrado.load("env_sim.pkl", ex_dir)
        if getattr(env, "randomizer", None) is not None:
            last_cand = to.load(osp.join(ex_dir, "candidates.pt"))[-1, :]
            env.adapt_randomizer(last_cand.numpy())
            print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w")
        else:
            print_cbt("Loaded environment has no randomizer, or it is None.",
                      "r")
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.subroutine_policy.policy,
                             verbose=True)
        # Extra (domain parameter distribution policy)
        extra["ddp_policy"] = pyrado.load("ddp_policy.pt",
                                          ex_dir,
                                          obj=algo.subroutine_distr.policy,
                                          verbose=True)

    elif algo.name in ["epopt", "udr"]:
        # Environment
        env = pyrado.load("env_sim.pkl", ex_dir)
        if getattr(env, "randomizer", None) is not None:
            if not isinstance(env, DomainRandWrapperLive):
                raise pyrado.TypeErr(given=env,
                                     expected_type=DomainRandWrapperLive)
            print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w")
        else:
            print_cbt("Loaded environment has no randomizer, or it is None.",
                      "y")
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Extra (value function)
        if isinstance(algo.subroutine, ActorCritic):
            extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt",
                                        ex_dir,
                                        obj=algo.subroutine.critic.vfcn,
                                        verbose=True)

    elif algo.name in ["bayessim", "npdr"]:
        # Environment
        env = pyrado.load("env_sim.pkl", ex_dir)
        if getattr(env, "randomizer", None) is not None:
            if not isinstance(env, DomainRandWrapperBuffer):
                raise pyrado.TypeErr(given=env,
                                     expected_type=DomainRandWrapperBuffer)
            typed_env(env, DomainRandWrapperBuffer).fill_buffer(10)
            print_cbt(
                f"Loaded the domain randomizer\n{env.randomizer}\nand filled it with 10 random instances.",
                "w")
        else:
            print_cbt("Loaded environment has no randomizer, or it is None.",
                      "y")
            env = remove_all_dr_wrappers(env, verbose=True)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Extra (prior, posterior, data)
        extra["prior"] = pyrado.load("prior.pt", ex_dir, verbose=True)
        # By default load the latest posterior (latest iteration and the last round)
        try:
            extra["posterior"] = algo.load_posterior(ex_dir,
                                                     args.iter,
                                                     args.round,
                                                     obj=None,
                                                     verbose=True)
            # Load the complete data or the data of the given iteration
            prefix = "" if args.iter == -1 else f"iter_{args.iter}"
            extra["data_real"] = pyrado.load(f"data_real.pt",
                                             ex_dir,
                                             prefix=prefix,
                                             verbose=True)
        except FileNotFoundError:
            pass

    elif algo.name in ["a2c", "ppo", "ppo2"]:
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Extra (value function)
        extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt",
                                    ex_dir,
                                    obj=algo.critic.vfcn,
                                    verbose=True)

    elif algo.name in ["hc", "pepg", "power", "cem", "reps", "nes"]:
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)

    elif algo.name in ["dql", "sac"]:
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Target value functions
        if algo.name == "dql":
            extra["qfcn_target"] = pyrado.load("qfcn_target.pt",
                                               ex_dir,
                                               obj=algo.qfcn_targ,
                                               verbose=True)
        elif algo.name == "sac":
            extra["qfcn_target1"] = pyrado.load("qfcn_target1.pt",
                                                ex_dir,
                                                obj=algo.qfcn_targ_1,
                                                verbose=True)
            extra["qfcn_target2"] = pyrado.load("qfcn_target2.pt",
                                                ex_dir,
                                                obj=algo.qfcn_targ_2,
                                                verbose=True)
        else:
            raise NotImplementedError

    elif algo.name == "svpg":
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Extra (particles)
        for idx, p in enumerate(algo.particles):
            extra[f"particle{idx}"] = pyrado.load(f"particle_{idx}.pt",
                                                  ex_dir,
                                                  obj=algo.particles[idx],
                                                  verbose=True)

    elif algo.name == "tspred":
        # Dataset
        extra["dataset"] = to.load(osp.join(ex_dir, "dataset.pt"))
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)

    elif algo.name == "sprl":
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        print_cbt(f"Loaded {osp.join(ex_dir, 'env.pkl')}.", "g")
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy)
        print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", "g")
        # Extra (value function)
        if isinstance(algo._subroutine, ActorCritic):
            extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt",
                                        ex_dir,
                                        obj=algo._subroutine.critic.vfcn,
                                        verbose=True)

    elif algo.name == "pddr":
        # Environment
        env = pyrado.load("env.pkl", ex_dir)
        # Policy
        policy = pyrado.load(f"{args.policy_name}.pt",
                             ex_dir,
                             obj=algo.policy,
                             verbose=True)
        # Teachers
        extra["teacher_policies"] = algo.teacher_policies
        extra["teacher_envs"] = algo.teacher_envs
        extra["teacher_expl_strats"] = algo.teacher_expl_strats
        extra["teacher_critics"] = algo.teacher_critics
        extra["teacher_ex_dirs"] = algo.teacher_ex_dirs

    else:
        raise pyrado.TypeErr(
            msg=
            "No matching algorithm name found during loading the experiment!")

    # Check if the return types are correct. They can be None, too.
    if env is not None and not isinstance(env, (SimEnv, EnvWrapper)):
        raise pyrado.TypeErr(given=env, expected_type=[SimEnv, EnvWrapper])
    if policy is not None and not isinstance(policy, Policy):
        raise pyrado.TypeErr(given=policy, expected_type=Policy)
    if extra is not None and not isinstance(extra, dict):
        raise pyrado.TypeErr(given=extra, expected_type=dict)

    return env, policy, extra
        raise pyrado.ValueErr(given=args.num_rollouts_per_config, ge_constraint="1")
    num_ml_samples = args.num_rollouts_per_config
    if not args.mode.lower() in ["samples", "confidence"]:
        raise pyrado.ValueErr(given=args, eq_constraint="samples or confidence")
    if args.cut_rollout is not None:
        if len(args.cut_rollout) != 2:
            raise pyrado.ValueErr(given=args.cut_rollout, eq_constraint="tuple of integers")
        else:
            args.cut_rollout = tuple(args.cut_rollout)

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment() if args.dir is None else args.dir

    # Load the environments, the policy, and the posterior
    env_sim, policy, kwout = load_experiment(ex_dir, args)
    env_sim = remove_all_dr_wrappers(env_sim)  # randomize manually later
    env_real = pyrado.load("env_real.pkl", ex_dir)
    prior = kwout["prior"]
    posterior = kwout["posterior"]
    data_real = kwout["data_real"]

    # Load the algorithm and the required data
    algo = Algorithm.load_snapshot(ex_dir)
    if not isinstance(algo, (NPDR, BayesSim)):
        raise pyrado.TypeErr(given=algo, expected_type=(NPDR, BayesSim))

    # Set seed if desired
    pyrado.set_seed(args.seed)

    # Load the rollouts
    rollouts_real, _ = load_rollouts_from_dir(ex_dir)