def __init__(self): AgentBase.__init__(self) self.ClassAct = ShareBiConv self.ClassCri = self.ClassAct self.if_use_cri_target = True self.if_use_act_target = True self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) if if_per_or_gae: # if_use_per self.criterion = torch.nn.MSELoss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.SmoothL1Loss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def __init__(self): AgentBase.__init__(self) self.ClassAct = ActorBiConv self.ClassCri = CriticBiConv self.if_use_cri_target = False self.if_use_act_target = False self.explore_noise = 2**-8 self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.act = self.cri = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.if_use_act_target: self.act_target = self.cri_target = deepcopy(self.act) else: self.act_target = self.cri_target = self.act self.cri_optim = torch.optim.Adam([ { 'params': self.act.enc_s.parameters(), 'lr': learning_rate * 1.25 }, { 'params': self.act.enc_a.parameters(), }, { 'params': self.act.mid_n.parameters(), 'lr': learning_rate * 1.25 }, { 'params': self.act.dec_a.parameters(), }, { 'params': self.act.dec_q.parameters(), }, ], lr=learning_rate) self.act_optim = self.cri_optim if if_per_or_gae: # if_use_per self.criterion = torch.nn.MSELoss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def __init__(self): AgentBase.__init__(self) self.ClassAct = Actor self.ClassCri = Critic self.if_use_cri_target = True self.if_use_act_target = True self.explore_noise = 0.3 # explore noise of action (OrnsteinUhlenbeckNoise) self.ou_noise = None
def __init__(self): AgentBase.__init__(self) self.ClassAct = Actor self.ClassCri = CriticTwin self.if_use_cri_target = True self.if_use_act_target = True self.explore_noise = 0.1 # standard deviation of exploration noise self.policy_noise = 0.2 # standard deviation of policy noise self.update_freq = 2 # delay update frequency
def __init__(self): AgentBase.__init__(self) self.ClassAct = ActorPPO self.ClassCri = CriticPPO self.if_off_policy = False self.ratio_clip = 0.2 # could be 0.00 ~ 0.50 ratio.clamp(1 - clip, 1 + clip) self.lambda_entropy = 0.02 # could be 0.00~0.10 self.lambda_a_value = 1.00 # could be 0.25~8.00, the lambda of advantage value self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.) self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def __init__(self): AgentBase.__init__(self) self.ClassCri = CriticTwin self.ClassAct = ActorSAC self.if_use_cri_target = True self.if_use_act_target = False self.alpha_log = None self.alpha_optim = None self.target_entropy = None self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def __init__(self): AgentBase.__init__(self) self.ClassCri = DiscreteCriSAC self.ClassAct = DiscreteActSAC self.train_reward = [] self.if_use_cri_target = True self.if_use_act_target = False self.trajectory_list = [] self.alpha_log = None self.alpha_optim = None self.target_entropy = None self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda self.train_iteraion = 0
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init(self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.traj_list = [list() for _ in range(env_num)] self.env_num = env_num if if_per_or_gae: # if_use_gae self.get_reward_sum = self.get_reward_sum_gae else: self.get_reward_sum = self.get_reward_sum_raw if env_num == 1: self.explore_env = self.explore_one_env else: self.explore_env = self.explore_vec_env
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): """ Explict call ``self.init()`` to overwrite the ``self.object`` in ``__init__()`` for multiprocessing. """ AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) self.alpha_log = torch.tensor( (-np.log(action_dim) * np.e, ), dtype=torch.float32, requires_grad=True, device=self.device) # trainable parameter self.alpha_optim = torch.optim.Adam((self.alpha_log, ), lr=learning_rate) self.target_entropy = np.log(action_dim) if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.SmoothL1Loss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def init(self, net_dim=256, state_dim=8, action_dim=2, reward_scale=1.0, gamma=0.99, learning_rate=1e-4, if_per_or_gae=False, env_num=1, gpu_id=0): AgentBase.init( self, net_dim=net_dim, state_dim=state_dim, action_dim=action_dim, reward_scale=reward_scale, gamma=gamma, learning_rate=learning_rate, if_per_or_gae=if_per_or_gae, env_num=env_num, gpu_id=gpu_id, ) #self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32, # requires_grad=True, device=self.device) # trainable parameter self.alpha_log = torch.zeros(1, dtype=torch.float32, requires_grad=True, device=self.device) self.alpha_optim = torch.optim.Adam((self.alpha_log, ), lr=learning_rate) self.target_entropy = np.log(action_dim) self.alpha = alpha = self.alpha_log.cpu().exp().item() self.trajectory_list = list() if if_per_or_gae: # if_use_per self.criterion = torch.nn.SmoothL1Loss(reduction='none') self.get_obj_critic = self.get_obj_critic_per else: self.criterion = torch.nn.MSELoss(reduction='mean') self.get_obj_critic = self.get_obj_critic_raw
def __init__(self): AgentBase.__init__(self) self.ClassCri = None # self.ClassCri = QNetDuel if self.if_use_dueling else QNet self.if_use_dueling = True # self.ClassCri = QNetDuel if self.if_use_dueling else QNet self.explore_rate = 0.25 # the probability of choosing action randomly in epsilon-greedy