def optimize_policy(self, itr, samples_data): policy_opt_input_values = self._policy_opt_input_values(samples_data) # Train policy network logger.log('Computing loss before') loss_before = self.optimizer.loss(policy_opt_input_values) logger.log('Computing KL before') policy_kl_before = self.f_policy_kl(*policy_opt_input_values) logger.log('Optimizing') self.optimizer.optimize(policy_opt_input_values) logger.log('Computing KL after') policy_kl = self.f_policy_kl(*policy_opt_input_values) logger.log('Computing loss after') loss_after = self.optimizer.loss(policy_opt_input_values) tabular.record('{}/LossBefore'.format(self.policy.name), loss_before) tabular.record('{}/LossAfter'.format(self.policy.name), loss_after) tabular.record('{}/dLoss'.format(self.policy.name), loss_before - loss_after) tabular.record('{}/KLBefore'.format(self.policy.name), policy_kl_before) tabular.record('{}/KL'.format(self.policy.name), policy_kl) pol_ent = self.f_policy_entropy(*policy_opt_input_values) tabular.record('{}/Entropy'.format(self.policy.name), pol_ent) num_traj = self.batch_size // self.max_path_length actions = samples_data['actions'][:num_traj, ...] histogram = Histogram(actions) tabular.record('{}/Actions'.format(self.policy.name), histogram) self._fit_baseline(samples_data) return self.get_itr_snapshot(itr, samples_data)
def test_record_histogram(self): with mock.patch('tensorboardX.SummaryWriter'): norm = scipy.stats.norm(loc=[1., 0.], scale=[0.5, 1.5]) samples = norm.rvs((10000, 2)) hist = Histogram(samples) self.tensor_board_output.record('Samples', hist) self.tensor_board_output.dump() assert self.mock_writer.add_histogram.call_count == 1
def _visualize_distribution(self): """Visualize encoder distribution.""" num_tasks = self.policy.task_space.flat_dim all_tasks = np.eye(num_tasks, num_tasks) _, latent_infos = self.policy.encoder.get_latents(all_tasks) for task in range(num_tasks): for i in range(self.policy.latent_space.flat_dim): stds = np.exp(latent_infos['log_std'][task, i]) norm = scipy.stats.norm(loc=latent_infos['mean'][task, i], scale=stds) samples = norm.rvs(100) hist = Histogram(samples) tabular.record('Encoder/task={},i={}'.format(task, i), hist)