Esempio n. 1
0
def load_rollouts_from_dir(
    ex_dir: str,
    key: Optional[str] = "rollout",
    file_exts: Tuple[str] = ("pt", "pkl")
) -> Tuple[List[StepSequence], List[str]]:
    """
    Crawl through the given directory, sort the files, and load all rollouts, i.e. all files that include the key.

    :param ex_dir: directory, e.g. and experiment folder
    :param key: word or part of a word that needs to the in the name of a file for it to be loaded
    :param file_exts: file extensions to be considered for loading
    :return: list of loaded rollouts, and list of file names without extension
    """
    if not osp.isdir(ex_dir):
        raise pyrado.PathErr(given=ex_dir)
    if not isinstance(key, str):
        raise pyrado.TypeErr(given=key, expected_type=str)
    if not is_iterable(file_exts):
        raise pyrado.TypeErr(given=file_exts, expected_type=Iterable)

    rollouts = []
    names = []
    for root, dirs, files in os.walk(ex_dir):
        dirs.clear()  # prevents walk() from going into subdirectories
        natural_sort(files)
        for f in files:
            f_ext = f[f.rfind(".") + 1:]
            if key in f and f_ext in file_exts:
                name = f[:f.rfind(".")]
                names.append(name)
                rollouts.append(pyrado.load(f"{name}.{f_ext}", load_dir=root))

    if not rollouts:
        raise pyrado.ValueErr(msg="No rollouts have been found!")

    if isinstance(rollouts[0], list):
        if not check_all_types_equal(rollouts):
            raise pyrado.TypeErr(
                msg=
                "Some rollout savings contain lists of rollouts, others don't!"
            )
        # The rollout files contain lists of rollouts, flatten them
        rollouts = list(itertools.chain(*rollouts))

    return rollouts, names
Esempio n. 2
0
    def eval_init_policies(self):
        """
        Execute the trained initial policies on the target device and store the estimated return per candidate.
        The number of initial policies to evaluate is the number of found policies.
        """
        # Crawl through the experiment's directory
        for root, dirs, files in os.walk(self.save_dir):
            dirs.clear()  # prevents walk() from going into subdirectories
            found_policies = [
                p for p in files
                if p.startswith("init_") and p.endswith("_policy.pt")
            ]
            found_cands = [
                c for c in files
                if c.startswith("init_") and c.endswith("_candidate.pt")
            ]
        if not len(found_policies) == len(found_cands):
            raise pyrado.ValueErr(
                msg=
                "Found a different number of initial policies than candidates!"
            )
        elif len(found_policies) == 0:
            raise pyrado.ValueErr(msg="No policies or candidates found!")

        num_init_cand = len(found_cands)
        cands_values = to.empty(num_init_cand)

        # Load all found candidates to save them into a single tensor
        found_cands = natural_sort(
            found_cands
        )  # the order is important since it determines the rows of the tensor
        cands = to.stack(
            [to.load(osp.join(self.save_dir, c)) for c in found_cands])

        # Evaluate learned policies from random candidates on the target environment (real-world) system
        for i in range(num_init_cand):
            policy = pyrado.load("policy.pt",
                                 self.save_dir,
                                 prefix=f"init_{i}",
                                 obj=self.policy)
            cands_values[i] = self.eval_policy(
                self.save_dir,
                self._env_real,
                policy,
                self.mc_estimator,
                prefix=f"init_{i}",
                num_rollouts=self.num_eval_rollouts_real,
                num_workers=self.num_workers,
            )

        # Save candidates and their returns into tensors (policy is saved during training or exists already)
        pyrado.save(cands_values, "candidates_values.pt", self.save_dir)
        self.cands, self.cands_values = cands, cands_values
Esempio n. 3
0
def load_hyperparameters(ex_dir: pyrado.PathLike,
                         verbose: bool = True) -> Union[dict, Optional[dict]]:
    """
    Loads the hyper-parameters-dict from the given experiment directory. The hyper-parameters file is assumed to be
    named `hyperparams.yaml`.

    :param ex_dir: experiment's directory to load from
    :param verbose: if `True`, print message if no hyper-parameter file was found
    """
    hparams_file_name = "hyperparams.yaml"

    for root, dirs, files in os.walk(ex_dir):
        dirs.clear()  # prevents walk() from going into subdirectories
        natural_sort(files)

        if hparams_file_name in files:
            # Default case
            return load_dict_from_yaml(osp.join(ex_dir, hparams_file_name))

        for file in files:
            # Recursively merge the hyper-parameter configurations
            if file.startswith("hparam") and file.endswith(".yaml"):
                hparam_args = load_dict_from_yaml(osp.join(ex_dir, file))
                setting_args = load_dict_from_yaml(
                    osp.join(ex_dir, "settings.yaml"))
                return update_matching_keys_recursively(
                    setting_args, hparam_args)

    # No hyper-parameter file was found
    if verbose:
        print_cbt(
            f"Did not find {hparams_file_name} in {ex_dir} or could not crawl the loaded hyper-parameters.",
            "y",
            bright=True,
        )
    return None
Esempio n. 4
0
            if not c.startswith("init_") and c.endswith("_candidate.pt")
        ]

    # Check
    if not found_policies:
        raise pyrado.ShapeErr(msg="No policies found!")
    if not found_cands:
        raise pyrado.ShapeErr(msg="No candidates found!")
    if len(found_policies) != len(found_cands):  # don't count the final policy
        raise pyrado.ShapeErr(
            msg=
            f"Found {len(found_policies)} initial policies but {len(found_cands)} candidates!"
        )

    # Sort
    found_policies = natural_sort(found_policies)
    found_cands = natural_sort(found_cands)

    # Plot the candidate values
    fig, ax = plt.subplots(1)
    for i in range(len(found_cands)):
        cand = to.load(osp.join(ex_dir, found_cands[i])).numpy()
        ax.scatter(np.arange(cand.size),
                   cand,
                   label=r"$\phi_{" + str(i) + "}$",
                   c=f"C{i%10}",
                   s=16)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.set_ylabel("parameter value")
    ax.set_xlabel("parameter index")
    plt.legend()
Esempio n. 5
0
if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()
    plt.rc("text", usetex=args.use_tex)

    # Get the experiments' directories to load from
    if args.dir is None:
        parent_dir = input(
            "Please enter the directory for the experiments to compare:\n")
    else:
        parent_dir = args.dir
    if not osp.isdir(parent_dir):
        raise pyrado.PathErr(given=parent_dir)
    dirs = get_immediate_subdirs(parent_dir)
    dirs = natural_sort(dirs)

    # Collect average and best returns per iteration
    df = pd.DataFrame()
    best_returns = []

    # Plot progress of each experiment
    fig, axs = plt.subplots(2, figsize=(12, 8))
    for idx, d in enumerate(dirs):
        # Load an experiment's data
        file = os.path.join(d, "progress.csv")
        data = read_csv_w_replace(file)

        # Append one column per experiment
        df = pd.concat([df, pd.DataFrame({f"ex_{idx}": data.avg_return})],
                       axis=1)
Esempio n. 6
0
    def __init__(
        self,
        name: str,
        parent_dir: str,
        incl_pattern: str = None,
        excl_pattern: str = None,
        latest_evals_only: bool = False,
        eval_subdir_name: str = "evaluation",
        sort: bool = False,
    ):
        """
        Constructor

        :param name: label for the data, e.g. name of the algorithm
        :param parent_dir: path to the algorithm's directory
        :param incl_pattern: only include experiments if their names partially contain the include pattern
        :param excl_pattern: exclude experiments if their names do not even partially contain the exclude pattern
        :param latest_evals_only: if `True` only the very latest evaluation file is loaded to estimate the returns
        :param sort: sort the found experiments by name, i.e. by date
        """
        if not osp.isdir(parent_dir):
            raise pyrado.PathErr(given=parent_dir)
        if incl_pattern is not None and not isinstance(incl_pattern, str):
            raise pyrado.TypeErr(given=incl_pattern, expected_type=str)
        if excl_pattern is not None and not isinstance(excl_pattern, str):
            raise pyrado.TypeErr(given=excl_pattern, expected_type=str)

        self.name = name
        self.parent_dir = parent_dir
        self.incl_pattern = incl_pattern
        self.excl_pattern = excl_pattern
        self.latest_evals_only = latest_evals_only
        self.eval_subdir_name = eval_subdir_name

        # Include experiments
        self.matches = get_immediate_subdirs(parent_dir)
        if sort:
            self.matches = natural_sort(self.matches)

        if self.incl_pattern is not None:
            # Only include experiments if their names partially contain the include pattern
            self.matches = list(filter(lambda d: self.incl_pattern in d, self.matches))

        if self.excl_pattern is not None:
            # Exclude experiments if their names do not even partially contain the exclude pattern
            self.matches = list(filter(lambda d: self.excl_pattern not in d, self.matches))

        self._returns_est_per_ex = []
        self.returns_est = []
        cnt_nonexist_dirs = 0
        for match in self.matches:
            # Get the evaluation subdirectory
            eval_dir = osp.join(match, self.eval_subdir_name)

            if osp.exists(eval_dir):
                # Crawl through the experiment's evaluation directory
                rets = []  # empirical returns from the experiments
                num_samples = []  # number of samples per return estimate
                for root, dirs, files in os.walk(eval_dir):
                    files.sort(reverse=True)  # in case there are multiple evaluations
                    # Only include the latest evaluation found in the folder if flag is set
                    for f in files if not self.latest_evals_only else files[:1]:
                        if f.endswith(".npy"):
                            rets.append(np.load(osp.join(eval_dir, f)))
                            num_samples.append(len(rets))
                        elif f.endswith(".pt"):
                            rets.append(to.load(osp.join(eval_dir, f)).cpu().numpy())
                        else:
                            raise FileNotFoundError

            else:
                cnt_nonexist_dirs += 1

            # Store the estimated return per evaluation run (averaged over individual evaluations)
            self._returns_est_per_ex.append(np.mean(np.asarray(rets), axis=1))
            self.returns_est.extend(np.mean(np.asarray(rets), axis=1))

        # Print what has been loaded
        ex_names = ["..." + m[m.rfind("/") :] for m in self.matches]  # cut off everything until the experiment's name
        print(
            tabulate(
                [[ex_name, ret] for ex_name, ret in zip(ex_names, self._returns_est_per_ex)],
                headers=["Loaded directory", "Returns averaged per experiment"],
            )
        )

        if cnt_nonexist_dirs == 0:
            print_cbt("All evaluation sub-directories have been found.", "g")
        else:
            print_cbt(f"{cnt_nonexist_dirs} evaluation sub-directories have been missed.", "y")