Exemple #1
0
 def get_stats(self, key):
     """
     Lets an algorithm ask the logger for mean/std/min/max of a diagnostic.
     """
     v = self.epoch_dict[key]
     vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
     return mpi_statistics_scalar(vals)
Exemple #2
0
    def log_tabular(self, key, val=None, with_min_and_max=False, average_only=False):
        """
        Log a value or possibly the mean/std/min/max values of a diagnostic.

        Args:
            key (string): The name of the diagnostic. If you are logging a
                diagnostic whose state has previously been saved with 
                ``store``, the key here has to match the key you used there.

            val: A value for the diagnostic. If you have previously saved
                values for this key via ``store``, do *not* provide a ``val``
                here.

            with_min_and_max (bool): If true, log min and max values of the 
                diagnostic over the epoch.

            average_only (bool): If true, do not log the standard deviation
                of the diagnostic over the epoch.
        """
        if val is not None:
            super().log_tabular(key,val)
        else:
            v = self.epoch_dict[key]
            vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
            stats = mpi_statistics_scalar(vals, with_min_and_max=with_min_and_max)
            super().log_tabular(key if average_only else 'Average' + key, stats[0])
            if not(average_only):
                super().log_tabular('Std'+key, stats[1])
            if with_min_and_max:
                super().log_tabular('Max'+key, stats[3])
                super().log_tabular('Min'+key, stats[2])
        self.epoch_dict[key] = []
Exemple #3
0
 def get(self):
     assert self.ptr == self.max_size
     self.ptr, self.path_start_idx = 0, 0
     # Normalization
     adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
     self.adv_buf = (self.adv_buf - adv_mean) / adv_std
     return [
         self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf,
         self.logp_buf
     ]
Exemple #4
0
 def get(self):
     """
     return all the samples in this epoch and ten start training
     :return:
     """
     assert self.ptr == self.max_size
     self.ptr, self.path_start_idx = 0, 0
     # advantage normalization
     adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
     self.adv_buf = (self.adv_buf - adv_mean) / adv_std
     return [
         self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf,
         self.logp_buf
     ] + core.values_as_sorted_list(self.info_bufs)
Exemple #5
0
    def get(self):
        """
        call at the end of epoch to get all data from buffer
        :return:
        """
        assert self.ptr == self.max_size
        self.ptr, self.path_start_idx = 0, 0

        # advantage normalization
        adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
        self.adv_buf = (self.adv_buf - adv_mean) / adv_std
        return [
            self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf,
            self.logp_buf
        ]