def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ soft_update(self.agents.target_critic, self.agents.critic, self.tau) self.niter += 1
def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ for a in self.agents: soft_update(a.target_critic, a.critic, self.tau) soft_update(a.target_policy, a.policy, self.tau) self.niter += 1
def update_all_agent(self): """ soft update all agent """ for a in self.agents: soft_update(a.target_actor, a.actor, self.tau) soft_update(a.target_critic, a.critic, self.tau) self.num_iteration += 1
def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ for i, a in enumerate(self.agents): soft_update(a.target_critic, a.critic, self.tau) soft_update(a.target_policy, a.policy, self.tau) if self.alg_types[i] in SHARED_ALGOS: break else: raise ValueError('Only shared algo on this branch') self.niter += 1
def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ if self.hard_update_interval is None: soft_update(self.target_critic, self.critic, self.tau) for a in self.agents: soft_update(a.target_policy, a.policy, self.tau) elif self.niter % self.hard_update_interval == 0: hard_update(self.target_critic, self.critic) for a in self.agents: hard_update(a.target_policy, a.policy)
def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ for target_param, param in zip( self.target_critic.nonattend_parameters(), self.critic.nonattend_parameters()): target_param.data.copy_(target_param.data * (1.0 - self.tau) + param.data * self.tau) for target_param, param in zip(self.target_critic.attend_parameters(), self.critic.attend_parameters()): target_param.data.copy_(target_param.data * (1.0 - self.attend_tau) + param.data * self.attend_tau) for a in self.agents: soft_update(a.target_policy, a.policy, self.tau)
def update_all_targets(self): """ Update all target networks (called after normal updates have been performed for each agent) """ if self.commonCritic: soft_update(self.target_critic, self.critic, self.tau) for a_i in range(len(self.agents)): a = self.agents[a_i] if not self.commonCritic: soft_update(a.target_critic, a.critic, self.tau) if a_i == 0: soft_update(a.target_policy, a.policy, self.tau) else: hard_update(a.policy, self.agents[0].policy) soft_update(a.target_policy, a.policy, self.tau) self.niter += 1
def update_target(self): soft_update(self.target_critic, self.critic, self.tau) soft_update(self.target_actor, self.actor, self.tau)
def update_all_targets(self): soft_update(self.target_critic, self.critic, self.tau) for a in self.agents: soft_update(a.target_policy, a.policy, self.tau)