def _setup_stats(self, base): """Create the running means and std of the model inputs and outputs. This method also adds the same running means and stds as scalars to tensorboard for additional storage. """ ops = [] names = [] ops += [tf.reduce_mean(self.critic_tf[0])] names += ['{}/reference_Q1_mean'.format(base)] ops += [reduce_std(self.critic_tf[0])] names += ['{}/reference_Q1_std'.format(base)] ops += [tf.reduce_mean(self.critic_tf[1])] names += ['{}/reference_Q2_mean'.format(base)] ops += [reduce_std(self.critic_tf[1])] names += ['{}/reference_Q2_std'.format(base)] ops += [tf.reduce_mean(self.critic_with_actor_tf[0])] names += ['{}/reference_actor_Q1_mean'.format(base)] ops += [reduce_std(self.critic_with_actor_tf[0])] names += ['{}/reference_actor_Q1_std'.format(base)] ops += [tf.reduce_mean(self.critic_with_actor_tf[1])] names += ['{}/reference_actor_Q2_mean'.format(base)] ops += [reduce_std(self.critic_with_actor_tf[1])] names += ['{}/reference_actor_Q2_std'.format(base)] ops += [tf.reduce_mean(self.actor_tf)] names += ['{}/reference_action_mean'.format(base)] ops += [reduce_std(self.actor_tf)] names += ['{}/reference_action_std'.format(base)] ops += [tf.reduce_mean(self.rew_ph)] names += ['{}/rewards'.format(base)] ops += [self.actor_loss] names += ['{}/actor_loss'.format(base)] ops += [self.critic_loss[0]] names += ['{}/Q1_loss'.format(base)] ops += [self.critic_loss[1]] names += ['{}/Q2_loss'.format(base)] # Add all names and ops to the tensorboard summary. for op, name in zip(ops, names): tf.compat.v1.summary.scalar(name, op) return ops, names
def _setup_stats(rew_ph, actor_loss, critic_loss, actor_tf, critic_tf): """Prepare tensorboard logging for attributes of the agent. Parameters ---------- rew_ph : tf.compat.v1.placeholder a placeholder for the rewards of an agent actor_loss : tf.Operation the operation that returns the loss of the actor critic_loss : list of tf.Operation the operation that returns the loss of the critic actor_tf : tf.Variable the output from the actor of the agent critic_tf : tf.Variable the output from the critics of the agent """ # rewards tf.compat.v1.summary.scalar('rewards', tf.reduce_mean(rew_ph)) # actor and critic losses tf.compat.v1.summary.scalar('actor_loss', actor_loss) tf.compat.v1.summary.scalar('Q1_loss', critic_loss[0]) tf.compat.v1.summary.scalar('Q2_loss', critic_loss[1]) # critic dynamics tf.compat.v1.summary.scalar('reference_Q1_mean', tf.reduce_mean(critic_tf[0])) tf.compat.v1.summary.scalar('reference_Q1_std', reduce_std(critic_tf[0])) tf.compat.v1.summary.scalar('reference_Q2_mean', tf.reduce_mean(critic_tf[1])) tf.compat.v1.summary.scalar('reference_Q2_std', reduce_std(critic_tf[1])) # actor dynamics tf.compat.v1.summary.scalar('reference_action_mean', tf.reduce_mean(actor_tf)) tf.compat.v1.summary.scalar('reference_action_std', reduce_std(actor_tf))
def _setup_stats(self, base): """Create the running means and std of the model inputs and outputs. This method also adds the same running means and stds as scalars to tensorboard for additional storage. """ ops = [] names = [] ops += [tf.reduce_mean(self.policy)] names += ['{}/reference_action_mean'.format(base)] ops += [reduce_std(self.policy)] names += ['{}/reference_action_std'.format(base)] ops += [tf.reduce_mean(self.loss)] names += ['{}/reference_loss_mean'.format(base)] ops += [reduce_std(self.loss)] names += ['{}/reference_loss_std'.format(base)] # Add all names and ops to the tensorboard summary. for op, name in zip(ops, names): tf.compat.v1.summary.scalar(name, op) return ops, names
def _setup_stats(self, base): """Create the running means and std of the model inputs and outputs. This method also adds the same running means and stds as scalars to tensorboard for additional storage. """ ops = [] names = [] ops += [tf.reduce_mean(self.qf1)] names += ['{}/reference_Q1_mean'.format(base)] ops += [reduce_std(self.qf1)] names += ['{}/reference_Q1_std'.format(base)] ops += [tf.reduce_mean(self.qf2)] names += ['{}/reference_Q2_mean'.format(base)] ops += [reduce_std(self.qf2)] names += ['{}/reference_Q2_std'.format(base)] ops += [tf.reduce_mean(self.qf1_pi)] names += ['{}/reference_actor_Q1_mean'.format(base)] ops += [reduce_std(self.qf1_pi)] names += ['{}/reference_actor_Q1_std'.format(base)] ops += [tf.reduce_mean(self.qf2_pi)] names += ['{}/reference_actor_Q2_mean'.format(base)] ops += [reduce_std(self.qf2_pi)] names += ['{}/reference_actor_Q2_std'.format(base)] ops += [ tf.reduce_mean(self._ac_magnitudes * self.policy_out + self._ac_means) ] names += ['{}/reference_action_mean'.format(base)] ops += [ reduce_std(self._ac_magnitudes * self.policy_out + self._ac_means) ] names += ['{}/reference_action_std'.format(base)] ops += [tf.reduce_mean(self.logp_pi)] names += ['{}/reference_log_probability_mean'.format(base)] ops += [reduce_std(self.logp_pi)] names += ['{}/reference_log_probability_std'.format(base)] ops += [tf.reduce_mean(self.rew_ph)] names += ['{}/rewards'.format(base)] ops += [self.alpha_loss] names += ['{}/alpha_loss'.format(base)] ops += [self.actor_loss] names += ['{}/actor_loss'.format(base)] ops += [self.critic_loss[0]] names += ['{}/Q1_loss'.format(base)] ops += [self.critic_loss[1]] names += ['{}/Q2_loss'.format(base)] tf.compat.v1.summary.scalar('Q2_loss', self.critic_loss[1]) ops += [self.critic_loss[2]] names += ['{}/value_loss'.format(base)] # Add all names and ops to the tensorboard summary. for op, name in zip(ops, names): tf.compat.v1.summary.scalar(name, op) return ops, names