from os.path import join import numpy as np import torch from sacred import Experiment from sacred.observers import FileStorageObserver from onlikhorn.algorithm import sinkhorn, online_sinkhorn, random_sinkhorn, subsampled_sinkhorn, schedule from onlikhorn.cache import torch_cached from onlikhorn.dataset import get_output_dir, make_data from onlikhorn.gaussian import sinkhorn_gaussian exp_name = 'online_grid_quiver_2' exp = Experiment(exp_name) exp_dir = join(get_output_dir(), exp_name) exp.observers = [FileStorageObserver(exp_dir)] @exp.config def config(): data_source = 'gmm_1d' n_samples = 10000 max_length = 20000 device = 'cuda' # Overrided batch_size = 100 seed = 0 epsilon = 1e-2 method = 'sinkhorn'
config["problem_spec"], config["inference_algorithm"], config["temperature"], ) if config["inference_algorithm"] == "rlsp": custom_id += "_" + config["solver"] if config["solver"] == "ppo": custom_id += "_{}_{}".format(config["solver_iterations"], config["reset_solver"]) return custom_id # started_event returns the _run._id ex = Experiment("rlsp") ex.observers = [SetID(), FileStorageObserver.create("results")] def get_all_rewards_from_latent_space(env, latent_space, r_inferred, r_task, inferred_weight): all_rewards_inferred = np.zeros(env.nS) for state_id in range(env.nS): obs = env.s_to_obs(env.get_state_from_num(state_id)) state = latent_space.encoder(obs) reward = np.dot(r_inferred, state) all_rewards_inferred[state_id] = reward return env.f_matrix @ r_task + inferred_weight * all_rewards_inferred def print_rollout(env, start_state, policies, last_steps_printed, horizon): if last_steps_printed == 0:
priority = 50 # very high priority to set id def started_event(self, ex_info, command, host_info, start_time, config, meta_info, _id): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if config["result_folder"] is not None: result_folder = config["result_folder"].strip("/").split("/")[-1] custom_id = f"{timestamp}_ablation_average_features_{result_folder}" else: custom_id = f"{timestamp}_ablation_average_features" return custom_id # started_event returns the _run._id ex = Experiment("mujoco-ablation-average-features") ex.observers = [ SetID(), FileStorageObserver.create("results/mujoco/ablation_average_features"), ] @ex.config def config(): result_folder = None # noqa:F841 @ex.automain def main(_run, result_folder, seed): ex = FileExperimentResults(result_folder) env_id = ex.config["env_id"] latent_model_checkpoint = ex.info["latent_model_checkpoint"] current_states = ex.info["current_states"]
# changes the run _id and thereby the path that the FileStorageObserver # writes the results # cf. https://github.com/IDSIA/sacred/issues/174 class SetID(RunObserver): priority = 50 # very high priority to set id def started_event(self, ex_info, command, host_info, start_time, config, meta_info, _id): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") return timestamp # started_event returns the _run._id ex = Experiment("policy-discriminator") ex.observers = [ SetID(), FileStorageObserver.create("results/policy_discriminator") ] def get_trajectories(env, policy_path, policy_type, n_rollouts, time_horizon): if policy_type == "sac": from stable_baselines import SAC model = SAC.load(policy_path) def get_action(obs): return model.predict(obs, deterministic=True)[0] elif policy_type == "gail": from imitation.policies import serialize from stable_baselines3.common.vec_env import DummyVecEnv
# writes the results # cf. https://github.com/IDSIA/sacred/issues/174 class SetID(RunObserver): priority = 50 # very high priority to set id def started_event( self, ex_info, command, host_info, start_time, config, meta_info, _id ): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") label = config["experiment_folder"].strip("/").split("/")[-1] custom_id = "{}_{}".format(timestamp, label) return custom_id # started_event returns the _run._id ex = Experiment("mujoco-eval") ex.observers = [SetID(), FileStorageObserver.create("results/mujoco/eval")] def print_rollout(env, policy, latent_space, decode=False): state = env.reset() done = False while not done: a, _ = policy.predict(state, deterministic=False) state, reward, done, info = env.step(a) if decode: obs = latent_space.decoder(state) else: obs = state print("action", a) print("obs", obs) print("reward", reward)
scheduler_D.step(G_iter) if extrapolation and update: # Reset extrapolated optimizers optimizer_D.deextrapolate() optimizer_G.deextrapolate() iteration += 1 iteration_time = time.perf_counter() - t0 elapsed_time += iteration_time cur_elapsed_time += iteration_time # Benchmark if upd_D and upd_G: key = 'DG' elif upd_D: key = 'D' else: key = 'G' sum_iteration_times[key] += iteration_time count_iterations[key] += 1 avg_iteration_times[ key] = sum_iteration_times[key] / count_iterations[key] if __name__ == '__main__': if not os.path.exists(exp_dir): os.makedirs(exp_dir) exp.observers = [FileStorageObserver.create(exp_dir)] exp.run_commandline()
priority = 50 # very high priority to set id def started_event(self, ex_info, command, host_info, start_time, config, meta_info, _id): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if config["result_folder"] is not None: result_folder = config["result_folder"].strip("/").split("/")[-1] custom_id = f"{timestamp}_ablation_waypoints_{result_folder}" else: custom_id = f"{timestamp}_ablation_waypoints" return custom_id # started_event returns the _run._id ex = Experiment("mujoco-ablation-waypoints") ex.observers = [ SetID(), FileStorageObserver.create("results/mujoco/ablation_waypoints"), ] class LatentSpaceTargetStateRewardWrapper(gym.Wrapper): def __init__(self, env, latent_space, target_states): self.env = env self.latent_space = latent_space self.target_states = [ts / np.linalg.norm(ts) for ts in target_states] self.state = None self.timestep = 0 super().__init__(env) def reset(self): obs = super().reset() self.state = self.latent_space.encoder(obs)