def _after_step(self, rollout, data, outs): logger.log( "========================| Timestep: {} |========================" .format(self.t)) logger.logkv('serial timesteps', self.t / self.nenv) logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths())) logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards())) logger.logkv( 'fps', int((self.t - self._t_start) / (time.time() - self._time_start))) logger.logkv('time_elapsed', time.time() - self._time_start) logger.logkv('time spent exploring', self.actor.eps) logger.dumpkvs()
def _update_model(self, data): if self.rn is not None: self._update_running_norm(data['obs'].reshape( -1, *data['obs'].shape[2:])) dataset, _ = util.make_dataset(data) batch = dataset.data_map for _ in range(self.args.epochs): losses = [] for b in dataset.iterate_once(self.batch_size * self.nenv): out = self._update(b) losses.append([out[k] for k in self._loss_keys]) meanlosses = np.array(losses).mean(axis=0) s = 'Losses: ' for i, ln in enumerate(self._loss_names): s += ln + ': {:08f} '.format(meanlosses[i]) logger.log(s) return meanlosses
def _update_model(self, data): if self.rn is not None: self._update_running_norm(data['obs'].reshape(-1, *data['obs'].shape[2:])) dataset, state_init = util.make_dataset(data, self.loss.is_recurrent) for i in range(self.args.epochs_per_iter): losses = [] state = state_init for batch in dataset.iterate_once(self.batch_size * self.nenv): out = self._update(batch, state) state = out['state_out'] losses.append([out['out'], out['p_loss'], out['v_loss'], out['ent_loss']]) meanlosses = np.array(losses).mean(axis=0) s = 'Losses: ' for i,ln in enumerate(['Total', 'Policy', 'Value', 'Entropy']): s += ln + ': {:08f} '.format(meanlosses[i]) logger.log(s) return meanlosses
def _after_step(self, rollout, data, losses): self.losses.append([losses['out'], losses['p_loss'], losses['v_loss'], losses['ent_loss']]) self.vtargs.extend(list(np.array(data['vtarg']).flatten())) self.vpreds.extend(list(np.array(data['vpreds']).flatten())) self.nsteps += 1 if self.nsteps % 100 == 0 and self.nsteps > 0: logger.log("========================| Timestep: {} |========================".format(self.t)) meanlosses = np.mean(np.array(self.losses), axis=0) # Logging stats... for i,s in enumerate(['Total Loss', 'Policy Loss', 'Value Loss', 'Entropy']): logger.logkv(s, meanlosses[i]) logger.logkv('timesteps', self.t) logger.logkv('serial timesteps', self.t / self.nenv) logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths())) logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards())) logger.logkv('explained var. of vtarg', util.explained_variance(np.array(self.vpreds), np.array(self.vtargs))) logger.logkv('fps', int((self.t - self._t_start) / (time.time() - self._time_start))) logger.logkv('time_elapsed', time.time() - self._time_start) logger.dumpkvs()
def _after_step(self, rollout, data, outs): self.nsteps += 1 if self.nsteps % 100 == 0: logger.log( "========================| Timestep: {} |========================" .format(self.t)) meanloss = np.mean(np.array(self.losses), axis=0) # Logging stats... logger.logkv('Loss', meanloss) logger.logkv('timesteps', self.t) logger.logkv('serial timesteps', self.t / self.nenv) logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths())) logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards())) logger.logkv( 'fps', int((self.t - self._t_start) / (time.time() - self._time_start))) logger.logkv('time_elapsed', time.time() - self._time_start) logger.logkv('time spent exploring', self.actor.eps) logger.dumpkvs()
def _after_step(self, rollout, data, update_outs): logger.log("After Step")
def _before_step(self): logger.log("Before Step")
def _before_step(self): logger.log("========================| Iteration: {} |========================".format(self.t // self.timesteps_per_step))