def logging_config(log_root, search): log_dir = os.path.join( # noqa: F841 pylint:disable=unused-variable log_root, "plot_epic_heatmap", str(search).replace("/", "_"), util.make_unique_timestamp(), )
def logging_config(log_root, tag): """Default logging configuration: hierarchical directory structure based on config.""" log_dir = os.path.join( # noqa: F841 pylint:disable=unused-variable log_root, "combined_distances", tag, imit_util.make_unique_timestamp(), )
def logging_config(log_root, env_name, dataset_tag, corr_kind, discount): """Default logging configuration: hierarchical directory structure based on config.""" log_dir = os.path.join( # noqa: F841 pylint:disable=unused-variable log_root, "erc", env_name, dataset_tag, corr_kind, f"discount{discount}", imit_util.make_unique_timestamp(), )
def paths(env_name, log_root, rollout_hint, data_dir): log_dir = os.path.join(log_root, env_name.replace("/", "_"), util.make_unique_timestamp()) # Recommended that user sets rollout_path manually. # By default we guess the named config associated with `env_name` # and attempt to load rollouts from `data/expert_models/`. if rollout_hint is None: rollout_hint = env_name.split("-")[0].lower() rollout_path = os.path.join(data_dir, "expert_models", f"{rollout_hint}_0", "rollouts", "final.pkl")
def logging_config(env_name, log_root): """Default logging configuration: hierarchical directory structure based on config.""" timestamp = imit_util.make_unique_timestamp() log_dir = os.path.join( log_root, "plot_heatmap", env_name, timestamp, ) _ = locals() del _
def hook(config, command_name, logger): del logger updates = {} if config["common"]["log_dir"] is None: env_sanitized = config["common"]["env_name"].replace("/", "_") log_dir = os.path.join( "output", command_name, env_sanitized, util.make_unique_timestamp(), ) updates["log_dir"] = log_dir return updates
def step(self, user_action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: """Steps the environment. DAgger needs to be able to inject imitation policy actions randomly at some subset of time steps. This method will replace the given action with a "robot" (i.e. imitation policy) action if necessary. Args: user_action: the _intended_ demonstrator action for the current state. This will be executed with probability `self.beta`. Otherwise, a "robot" action will be sampled and executed instead. Returns: next_obs, reward, done, info: unchanged output of `self.env.step()`. """ assert self._is_reset, "call .reset() before .step()" # Replace the given action with a robot action 100*(1-beta)% of the time. if np.random.uniform(0, 1) > self.beta: actual_act = self.get_robot_act(self._last_obs) else: actual_act = user_action # actually step the env & record data as appropriate next_obs, reward, done, info = self.env.step(actual_act) self._last_obs = next_obs self.traj_accum.add_step({ "acts": user_action, "obs": next_obs, "rews": reward, "infos": info }) # if we're finished, then save the trajectory & print a message if done and not self._done_before: trajectory = self.traj_accum.finish_trajectory() timestamp = util.make_unique_timestamp() trajectory_path = os.path.join(self.save_dir, "dagger-demo-" + timestamp + ".npz") logging.info(f"Saving demo at '{trajectory_path}'") _save_trajectory(trajectory_path, trajectory) if done: # record the fact that we're already done to avoid saving demo over and # over until the user resets self._done_before = True return next_obs, reward, done, info
def config(): sacred_ex_name = "expert_demos" # The experiment to parallelize _uuid = make_unique_timestamp() run_name = (f"DEFAULT_{_uuid}" ) # CLI --name option. For analysis grouping. resources_per_trial = {} # Argument to `tune.run` base_named_configs = [ ] # Background settings before search_space is applied base_config_updates = { } # Background settings before search_space is applied search_space = { "named_configs": [], "config_updates": {}, } # `config` argument to `ray.tune.run(trainable, config)` local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` n_seeds = 3 # Number of seeds to search over by default
def logging_config(log_root): log_dir = os.path.join( # noqa: F841 pylint:disable=unused-variable log_root, "plot_gridworld_divergence", util.make_unique_timestamp(), )
def logging(env_name, log_root): log_dir = os.path.join(log_root, env_name.replace("/", "_"), util.make_unique_timestamp())
def logging_config(log_root, env_name): log_dir = os.path.join(log_root, env_name.replace("/", "_"), util.make_unique_timestamp()) _ = locals() # quieten flake8 unused variable warning del _
def logging_config(log_root, exp_name): log_dir = os.path.join( # noqa: F841 pylint:disable=unused-variable log_root, exp_name, util.make_unique_timestamp(), )
def logging_config(log_root, run_tag): """Logging configuration: timestamp plus unique UUID.""" log_dir = os.path.join(log_root, "train_experts", run_tag, util.make_unique_timestamp()) _ = locals() del _