def _update_target(self): tf_utils.soft_variables_update(self._policy.trainable_variables, self._target_policy.trainable_variables, tau=self._target_update_tau) tf_utils.soft_variables_update(self._qf.trainable_variables, self._target_qf.trainable_variables, tau=self._target_update_tau)
def init_opt(self): tf_utils.soft_variables_update(self._policy.trainable_variables, self._target_policy.trainable_variables, tau=1.0) tf_utils.soft_variables_update(self._qf.trainable_variables, self._target_qf.trainable_variables, tau=1.0) self._exploration_status = True
def _update_target(self): tf_utils.soft_variables_update( self._vf.trainable_variables, self._target_vf.trainable_variables, tau=self._target_update_tau, )