def configure_optimizers(self): optimizers = [] optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q_network.parameters())) if self.calc_cpe_in_training: optimizers.append( self.reward_network_optimizer.make_optimizer_scheduler( self.reward_network.parameters())) optimizers.append( self.q_network_cpe_optimizer.make_optimizer_scheduler( self.q_network_cpe.parameters())) # soft-update target_params = list(self.q_network_target.parameters()) source_params = list(self.q_network.parameters()) if self.calc_cpe_in_training: target_params += list(self.q_network_cpe_target.parameters()) source_params += list(self.q_network_cpe.parameters()) optimizers.append( SoftUpdate.make_optimizer_scheduler(target_params, source_params, tau=self.tau)) return optimizers
def configure_optimizers(self): optimizers = [] target_params = list(self.q_network_target.parameters()) source_params = list(self.q_network.parameters()) optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q_network.parameters())) if self.calc_cpe_in_training: ( cpe_target_params, cpe_source_params, cpe_optimizers, ) = self._configure_cpe_optimizers() target_params += cpe_target_params source_params += cpe_source_params optimizers += cpe_optimizers optimizers.append( SoftUpdate.make_optimizer_scheduler(target_params, source_params, tau=self.tau)) return optimizers
def configure_optimizers(self): optimizers = [] optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q1_network.parameters())) if self.q2_network: optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q2_network.parameters())) optimizers.append( self.actor_network_optimizer.make_optimizer_scheduler( self.actor_network.parameters())) # soft-update target_params = list(self.q1_network_target.parameters()) source_params = list(self.q1_network.parameters()) if self.q2_network: target_params += list(self.q2_network_target.parameters()) source_params += list(self.q2_network.parameters()) target_params += list(self.actor_network_target.parameters()) source_params += list(self.actor_network.parameters()) optimizers.append( SoftUpdate.make_optimizer_scheduler(target_params, source_params, tau=self.tau)) return optimizers
def configure_optimizers(self): optimizers = [] optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q1_network.parameters() ) ) if self.q2_network: optimizers.append( self.q_network_optimizer.make_optimizer_scheduler( self.q2_network.parameters() ) ) optimizers.append( self.actor_network_optimizer.make_optimizer_scheduler( self.actor_network.parameters() ) ) if self.alpha_optimizer is not None: optimizers.append( self.alpha_optimizer.make_optimizer_scheduler([self.log_alpha]) ) if self.value_network: optimizers.append( self.value_network_optimizer.make_optimizer_scheduler( self.value_network.parameters() ) ) # soft-update if self.value_network: target_params = self.value_network_target.parameters() source_params = self.value_network.parameters() else: target_params = list(self.q1_network_target.parameters()) source_params = list(self.q1_network.parameters()) if self.q2_network: target_params += list(self.q2_network_target.parameters()) source_params += list(self.q2_network.parameters()) optimizers.append( SoftUpdate.make_optimizer_scheduler( target_params, source_params, tau=self.tau ) ) return optimizers