Esempio n. 1
0
    def get(self):
        assert self.ptr == self.max_size  # buffer has to be full before you can get
        self.ptr, self.path_start_idx = 0, 0

        # Advantage normalizing trick for policy gradient
        adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
        self.adv_buf = (self.adv_buf - adv_mean) / (adv_std + EPS)

        # Center, but do NOT rescale advantages for cost gradient
        cadv_mean, _ = mpi_statistics_scalar(self.cadv_buf)
        self.cadv_buf -= cadv_mean

        return [
            self.obs_buf, self.act_buf, self.adv_buf, self.cadv_buf,
            self.ret_buf, self.cret_buf, self.logp_buf
        ] + values_as_sorted_list(self.pi_info_bufs)
Esempio n. 2
0
 def get_stats(self, key):
     """
     Lets an algorithm ask the logger for mean/std/min/max of a diagnostic.
     """
     v = self.epoch_dict[key]
     vals = np.concatenate(v) if isinstance(
         v[0], np.ndarray) and len(v[0].shape) > 0 else v
     return mpi_statistics_scalar(vals)
Esempio n. 3
0
    def log_tabular(self,
                    key,
                    val=None,
                    with_min_and_max=False,
                    average_only=False):
        """
        Log a value or possibly the mean/std/min/max values of a diagnostic.

        Args:
            key (string): The name of the diagnostic. If you are logging a
                diagnostic whose state has previously been saved with 
                ``store``, the key here has to match the key you used there.

            val: A value for the diagnostic. If you have previously saved
                values for this key via ``store``, do *not* provide a ``val``
                here.

            with_min_and_max (bool): If true, log min and max values of the 
                diagnostic over the epoch.

            average_only (bool): If true, do not log the standard deviation
                of the diagnostic over the epoch.
        """
        if val is not None:
            super().log_tabular(key, val)
        else:
            v = self.epoch_dict[key]
            vals = np.concatenate(v) if isinstance(
                v[0], np.ndarray) and len(v[0].shape) > 0 else v
            stats = mpi_statistics_scalar(vals,
                                          with_min_and_max=with_min_and_max)
            super().log_tabular(key if average_only else 'Average' + key,
                                stats[0])
            if not (average_only):
                super().log_tabular('Std' + key, stats[1])
            if with_min_and_max:
                super().log_tabular('Max' + key, stats[3])
                super().log_tabular('Min' + key, stats[2])
        self.epoch_dict[key] = []