コード例 #1
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False,
                 gradient_noise=0.0):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)
        else:
            self.optimizer = optim.RMSprop(actor_critic.parameters(),
                                           lr,
                                           eps=eps,
                                           alpha=alpha)
        self.gradient_noise = gradient_noise
コード例 #2
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm
        # import pdb; pdb.set_trace()
        if acktr:
            self.optimizer = KFACOptimizer([actor_critic, actor_critic])
        else:
            self.optimizer = optim.RMSprop(actor_critic.parameters(),
                                           lr,
                                           eps=eps,
                                           alpha=alpha)
コード例 #3
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False,
                 train_selfsup_attention=False):

        self.actor_critic = actor_critic
        self.acktr = acktr
        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef
        self.max_grad_norm = max_grad_norm
        self.train_selfsup_attention = train_selfsup_attention

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)
        else:
            self.optimizer = optim.RMSprop(actor_critic.parameters(),
                                           lr,
                                           eps=eps,
                                           alpha=alpha)

        if self.train_selfsup_attention:
            self.selfsup_attention_optimizer = optim.Adam(
                actor_critic.base.selfsup_attention.parameters(), 0.001)
コード例 #4
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 lr_beta=None,
                 reg_beta=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)

        self.beta_actor_list = []
        self.param_list = []
        for name, param in actor_critic.named_parameters():
            if "base.beta_net_actor" in name :
                self.beta_actor_list.append(param)
            else:
                self.param_list.append(param)
        else:
            # Pierre: separate learning rates for beta net and actor net
            self.optimizer = optim.RMSprop([{'params': self.param_list},
                 {'params': self.beta_actor_list, 'lr': lr_beta, 'weight_decay':reg_beta}], lr, eps=eps, alpha=alpha)
コード例 #5
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False,
                 path_recorder=None,
                 cost_evaluator=None,
                 arch_loss_coef=0):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm

        self.path_recorder = path_recorder
        self.cost_evaluator = cost_evaluator
        self.arch_loss_coef = arch_loss_coef

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)
        else:
            self.optimizer = optim.RMSprop(actor_critic.parameters(),
                                           lr,
                                           eps=eps,
                                           alpha=alpha)
コード例 #6
0
 def init_optimizer(self):
     if self.acktr:
         self.optimizer = KFACOptimizer(self.actor_critic)
     else:
         self.optimizer = optim.RMSprop(self.actor_critic.parameters(),
                                        self.lr,
                                        eps=self.eps,
                                        alpha=self.alpha)
         self.schedulers = ExponentialLR(self.optimizer, self.lr_decay)
コード例 #7
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 filter_mem=None,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)

        self.filter_list = []
        self.param_list = []
        for name, param in actor_critic.named_parameters():
            if "base.filter_net" in name:
                self.filter_list.append(param)
            else:
                self.param_list.append(param)

        else:
            self.optimizer = optim.RMSprop([{
                'params': self.param_list
            }, {
                'params': self.filter_list
            }],
                                           lr,
                                           eps=eps,
                                           alpha=alpha)
コード例 #8
0
    def __init__(self,
                 actor_critic,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 lr_beta=None,
                 reg_beta=None,
                 delib_center=0.5,
                 eps=None,
                 alpha=None,
                 max_grad_norm=None,
                 acktr=False):

        self.actor_critic = actor_critic
        self.acktr = acktr

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm
        self.reg_beta = reg_beta

        self.delib_center = delib_center

        if acktr:
            self.optimizer = KFACOptimizer(actor_critic)

        self.beta_value_list = []
        self.param_list = []
        for name, param in actor_critic.named_parameters():
            if "base.beta_value_net" in name :
                self.beta_value_list.append(param)
            else:
                self.param_list.append(param)

        else:
            self.optimizer = optim.RMSprop([{'params': self.param_list},
                 {'params': self.beta_value_list, 'lr': lr_beta}], lr, eps=eps, alpha=alpha)