Ejemplo n.º 1
0
    def _after_step(self, rollout, data, losses):
        for i,s in enumerate(['Total Loss', 'Policy Loss', 'Value Loss', 'Entropy']):
            logger.logkv(s, losses[i])
        vtarg_flat = data['vtarg'].flatten()
        vpred_flat = data['vpreds'].flatten()

        logger.logkv('timesteps', self.t)
        logger.logkv('serial timesteps', self.t / self.nenv)
        logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths()))
        logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards()))
        logger.logkv('explained var. of vtarg', util.explained_variance(vpred_flat, vtarg_flat))
        logger.logkv('fps', int((self.t - self._t_start) / (time.time() - self._time_start)))
        logger.logkv('time_elapsed', time.time() - self._time_start)
        logger.dumpkvs()
Ejemplo n.º 2
0
 def _after_step(self, rollout, data, outs):
     logger.log(
         "========================|  Timestep: {}  |========================"
         .format(self.t))
     logger.logkv('serial timesteps', self.t / self.nenv)
     logger.logkv('mean episode length',
                  np.mean(self.runner.get_episode_lengths()))
     logger.logkv('mean episode reward',
                  np.mean(self.runner.get_episode_rewards()))
     logger.logkv(
         'fps',
         int((self.t - self._t_start) / (time.time() - self._time_start)))
     logger.logkv('time_elapsed', time.time() - self._time_start)
     logger.logkv('time spent exploring', self.actor.eps)
     logger.dumpkvs()
Ejemplo n.º 3
0
    def _after_step(self, rollout, data, losses):
        self.losses.append([losses['out'], losses['p_loss'], losses['v_loss'], losses['ent_loss']])
        self.vtargs.extend(list(np.array(data['vtarg']).flatten()))
        self.vpreds.extend(list(np.array(data['vpreds']).flatten()))

        self.nsteps += 1
        if self.nsteps % 100 == 0 and self.nsteps > 0:
            logger.log("========================|  Timestep: {}  |========================".format(self.t))
            meanlosses = np.mean(np.array(self.losses), axis=0)
            # Logging stats...
            for i,s in enumerate(['Total Loss', 'Policy Loss', 'Value Loss', 'Entropy']):
                logger.logkv(s, meanlosses[i])
            logger.logkv('timesteps', self.t)
            logger.logkv('serial timesteps', self.t / self.nenv)
            logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths()))
            logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards()))
            logger.logkv('explained var. of vtarg', util.explained_variance(np.array(self.vpreds), np.array(self.vtargs)))
            logger.logkv('fps', int((self.t - self._t_start) / (time.time() - self._time_start)))
            logger.logkv('time_elapsed', time.time() - self._time_start)
            logger.dumpkvs()
Ejemplo n.º 4
0
 def _after_step(self, rollout, data, outs):
     self.nsteps += 1
     if self.nsteps % 100 == 0:
         logger.log(
             "========================|  Timestep: {}  |========================"
             .format(self.t))
         meanloss = np.mean(np.array(self.losses), axis=0)
         # Logging stats...
         logger.logkv('Loss', meanloss)
         logger.logkv('timesteps', self.t)
         logger.logkv('serial timesteps', self.t / self.nenv)
         logger.logkv('mean episode length',
                      np.mean(self.runner.get_episode_lengths()))
         logger.logkv('mean episode reward',
                      np.mean(self.runner.get_episode_rewards()))
         logger.logkv(
             'fps',
             int((self.t - self._t_start) /
                 (time.time() - self._time_start)))
         logger.logkv('time_elapsed', time.time() - self._time_start)
         logger.logkv('time spent exploring', self.actor.eps)
         logger.dumpkvs()