def optimize_policy(self, itr, samples_data): policy_opt_input_values = self._policy_opt_input_values(samples_data) # Train policy network logger.log("Computing loss before") loss_before = self.optimizer.loss(policy_opt_input_values) logger.log("Computing KL before") policy_kl_before = self.f_policy_kl(*policy_opt_input_values) logger.log("Optimizing") self.optimizer.optimize(policy_opt_input_values) logger.log("Computing KL after") policy_kl = self.f_policy_kl(*policy_opt_input_values) logger.log("Computing loss after") loss_after = self.optimizer.loss(policy_opt_input_values) logger.record_tabular("{}/LossBefore".format(self.policy.name), loss_before) logger.record_tabular("{}/LossAfter".format(self.policy.name), loss_after) logger.record_tabular("{}/dLoss".format(self.policy.name), loss_before - loss_after) logger.record_tabular("{}/KLBefore".format(self.policy.name), policy_kl_before) logger.record_tabular("{}/KL".format(self.policy.name), policy_kl) pol_ent = self.f_policy_entropy(*policy_opt_input_values) logger.record_tabular("{}/Entropy".format(self.policy.name), np.mean(pol_ent)) num_traj = self.batch_size // self.max_path_length actions = samples_data["actions"][:num_traj, ...] logger.record_histogram("{}/Actions".format(self.policy.name), actions) self._fit_baseline(samples_data)
def visualize_distribution(self, samples_data): """ Visualize embedding distribution """ # distributions num_tasks = self.policy.task_space.flat_dim all_tasks = np.eye(num_tasks, num_tasks) _, latent_infos = self.policy._embedding.get_latents(all_tasks) for i in range(self.policy.latent_space.flat_dim): log_stds = latent_infos["log_std"][:, i] if self.policy.embedding._std_parameterization == "exp": stds = np.exp(log_stds) elif self.policy.embedding._std_parameterization == "softplus": stds = np.log(1. + log_stds) else: raise NotImplementedError logger.record_histogram_by_type("normal", shape=[1000, num_tasks], key="Embedding/i={}".format(i), mean=latent_infos["mean"][:, i], stddev=stds) num_traj = self.batch_size // self.max_path_length # # samples # latents = samples_data["latents"][:num_traj, 0] # for i in range(self.policy.latent_space.flat_dim): # logger.record_histogram("Embedding/samples/i={}".format(i), # latents[:, i]) # action distributions actions = samples_data["actions"][:num_traj, ...] logger.record_histogram("Actions", actions)
#!/usr/bin/env python3 import tensorflow as tf from garage.misc import logger logger.set_tensorboard_dir("data/local/histogram_example") N = 400 for i in range(N): sess = tf.Session() sess.__enter__() k_val = i / float(N) logger.record_histogram_by_type('gamma', key='gamma', alpha=k_val) logger.record_histogram_by_type('normal', key='normal', mean=5 * k_val, stddev=1.0) logger.record_histogram_by_type('poisson', key='poisson', lam=k_val) logger.record_histogram_by_type('uniform', key='uniform', maxval=k_val * 10) logger.record_tabular("app", k_val) logger.record_histogram("gass", k_val) logger.dump_tensorboard(step=i)