def postprocess_trajectory(self, sample_batch, other_agent_batches=None, episode=None): if self.config["parameter_noise"]: # adjust the sigma of parameter space noise states, noisy_actions = [ list(x) for x in sample_batch.columns( [SampleBatch.CUR_OBS, SampleBatch.ACTIONS]) ] self.sess.run(self.remove_noise_op) clean_actions = self.sess.run(self.output_actions, feed_dict={ self.cur_observations: states, self.stochastic: False, self.noise_scale: .0, self.pure_exploration_phase: False, }) distance_in_action_space = np.sqrt( np.mean(np.square(clean_actions - noisy_actions))) self.pi_distance = distance_in_action_space if distance_in_action_space < \ self.config["exploration_ou_sigma"] * self.cur_noise_scale: # multiplying the sampled OU noise by noise scale is # equivalent to multiplying the sigma of OU by noise scale self.parameter_noise_sigma_val *= 1.01 else: self.parameter_noise_sigma_val /= 1.01 self.parameter_noise_sigma.load(self.parameter_noise_sigma_val, session=self.sess) return postprocess_nstep_and_prio(self, sample_batch)
def postprocess_trajectory(self, sample_batch, other_agent_batches=None, episode=None): if self.config["parameter_noise"]: # adjust the sigma of parameter space noise states, noisy_actions = [ list(x) for x in sample_batch.columns( [SampleBatch.CUR_OBS, SampleBatch.ACTIONS]) ] self.sess.run(self.remove_parameter_noise_op) # TODO(sven): This won't work if exploration != Noise, which is # probably fine as parameter_noise will soon be its own # Exploration class. clean_actions, cur_noise_scale = self.sess.run( [self.output_actions, self.exploration.get_info()], feed_dict={ self.cur_observations: states, self._is_exploring: False, self._timestep: self.global_timestep, }) distance_in_action_space = np.sqrt( np.mean(np.square(clean_actions - noisy_actions))) self.pi_distance = distance_in_action_space if distance_in_action_space < \ self.config["exploration_config"].get("ou_sigma", 0.2) * \ cur_noise_scale: # multiplying the sampled OU noise by noise scale is # equivalent to multiplying the sigma of OU by noise scale self.parameter_noise_sigma_val *= 1.01 else: self.parameter_noise_sigma_val /= 1.01 self.parameter_noise_sigma.load(self.parameter_noise_sigma_val, session=self.sess) return postprocess_nstep_and_prio(self, sample_batch)
def postprocess_trajectory(policy, sample_batch, other_agent_batches=None, episode=None): return postprocess_nstep_and_prio(policy, sample_batch)