def rollout(self, timestep_limit, add_noise=True): rollout_rewards, rollout_fragment_length = rollout( self.policy, self.env, timestep_limit=timestep_limit, add_noise=add_noise) return rollout_rewards, rollout_fragment_length
def rollout(self, timestep_limit): rollout_rewards, rollout_length = rollout( self.policy, self.env, timestep_limit=timestep_limit, add_noise=False) return rollout_rewards, rollout_length
def rollout(self, timestep_limit, add_noise=False): rollout_rewards, rollout_fragment_length = rollout( self.policy, self.env, timestep_limit=timestep_limit, add_noise=add_noise, offset=self.config["offset"]) return rollout_rewards, rollout_fragment_length
def evaluate(self, candidate): noise_index, multiplier = candidate weights = self.common.model_keeper.get_perturbed_weights( noise_index, multiplier) self.common.policy.set_flat_weights(weights) rewards, length = \ rollout( self.common.policy, self.common.env, timestep_limit=self.timestep_limit, add_noise=False) return rewards.sum(), length
def evaluate(self, candidate): # ******************************* how to evaluate a candidate message weights = self.common.optimizer.expand(candidate) self.common.policy.set_flat_weights(weights) rewards, length = \ rollout( self.common.policy, self.common.env, timestep_limit=self.timestep_limit, add_noise=False) # if candidate == 0: logger.info('candidate {} {} {} {} {}'.format( candidate, weights[0], self.common.policy.get_flat_weights()[0], rewards.sum(), length)) return rewards.sum(), length