コード例 #1
0
ファイル: npo_v1.py プロジェクト: maliesa96/fyra
    def optimize_policy(self, itr, samples_data):
        policy_opt_input_values = self._policy_opt_input_values(samples_data)

        # Train policy network
        logger.log("Computing loss before")
        loss_before = self.optimizer.loss(policy_opt_input_values)
        logger.log("Computing KL before")
        policy_kl_before = self.f_policy_kl(*policy_opt_input_values)
        logger.log("Optimizing")
        self.optimizer.optimize(policy_opt_input_values)
        logger.log("Computing KL after")
        policy_kl = self.f_policy_kl(*policy_opt_input_values)
        logger.log("Computing loss after")
        loss_after = self.optimizer.loss(policy_opt_input_values)
        logger.record_tabular("{}/LossBefore".format(self.policy.name),
                              loss_before)
        logger.record_tabular("{}/LossAfter".format(self.policy.name),
                              loss_after)
        logger.record_tabular("{}/dLoss".format(self.policy.name),
                              loss_before - loss_after)
        logger.record_tabular("{}/KLBefore".format(self.policy.name),
                              policy_kl_before)
        logger.record_tabular("{}/KL".format(self.policy.name), policy_kl)

        pol_ent = self.f_policy_entropy(*policy_opt_input_values)
        logger.record_tabular("{}/Entropy".format(self.policy.name),
                              np.mean(pol_ent))

        num_traj = self.batch_size // self.max_path_length
        actions = samples_data["actions"][:num_traj, ...]
        logger.record_histogram("{}/Actions".format(self.policy.name), actions)

        self._fit_baseline(samples_data)
コード例 #2
0
    def visualize_distribution(self, samples_data):
        """ Visualize embedding distribution """

        # distributions
        num_tasks = self.policy.task_space.flat_dim
        all_tasks = np.eye(num_tasks, num_tasks)
        _, latent_infos = self.policy._embedding.get_latents(all_tasks)
        for i in range(self.policy.latent_space.flat_dim):
            log_stds = latent_infos["log_std"][:, i]
            if self.policy.embedding._std_parameterization == "exp":
                stds = np.exp(log_stds)
            elif self.policy.embedding._std_parameterization == "softplus":
                stds = np.log(1. + log_stds)
            else:
                raise NotImplementedError
            logger.record_histogram_by_type("normal",
                                            shape=[1000, num_tasks],
                                            key="Embedding/i={}".format(i),
                                            mean=latent_infos["mean"][:, i],
                                            stddev=stds)

        num_traj = self.batch_size // self.max_path_length
        # # samples
        # latents = samples_data["latents"][:num_traj, 0]
        # for i in range(self.policy.latent_space.flat_dim):
        #     logger.record_histogram("Embedding/samples/i={}".format(i),
        #                             latents[:, i])

        # action distributions
        actions = samples_data["actions"][:num_traj, ...]
        logger.record_histogram("Actions", actions)
コード例 #3
0
#!/usr/bin/env python3

import tensorflow as tf

from garage.misc import logger

logger.set_tensorboard_dir("data/local/histogram_example")
N = 400
for i in range(N):
    sess = tf.Session()
    sess.__enter__()
    k_val = i / float(N)
    logger.record_histogram_by_type('gamma', key='gamma', alpha=k_val)
    logger.record_histogram_by_type('normal',
                                    key='normal',
                                    mean=5 * k_val,
                                    stddev=1.0)
    logger.record_histogram_by_type('poisson', key='poisson', lam=k_val)
    logger.record_histogram_by_type('uniform',
                                    key='uniform',
                                    maxval=k_val * 10)
    logger.record_tabular("app", k_val)
    logger.record_histogram("gass", k_val)
    logger.dump_tensorboard(step=i)