def __init__(self): AgentBase.__init__(self) self.ClassAct = ShareBiConv self.ClassCri = self.ClassAct self.if_use_cri_target = True self.if_use_act_target = True self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def __init__(self): AgentBase.__init__(self) self.ClassAct = ActorBiConv self.ClassCri = CriticBiConv self.if_use_cri_target = False self.if_use_act_target = False self.explore_noise = 2**-8 self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def __init__(self): AgentBase.__init__(self) self.ClassAct = Actor self.ClassCri = Critic self.if_use_cri_target = True self.if_use_act_target = True self.explore_noise = 0.3 # explore noise of action (OrnsteinUhlenbeckNoise) self.ou_noise = None
def __init__(self): AgentBase.__init__(self) self.ClassAct = Actor self.ClassCri = CriticTwin self.if_use_cri_target = True self.if_use_act_target = True self.explore_noise = 0.1 # standard deviation of exploration noise self.policy_noise = 0.2 # standard deviation of policy noise self.update_freq = 2 # delay update frequency
def __init__(self): AgentBase.__init__(self) self.ClassAct = ActorPPO self.ClassCri = CriticPPO self.if_off_policy = False self.ratio_clip = 0.2 # could be 0.00 ~ 0.50 ratio.clamp(1 - clip, 1 + clip) self.lambda_entropy = 0.02 # could be 0.00~0.10 self.lambda_a_value = 1.00 # could be 0.25~8.00, the lambda of advantage value self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.) self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def __init__(self): AgentBase.__init__(self) self.ClassCri = CriticTwin self.ClassAct = ActorSAC self.if_use_cri_target = True self.if_use_act_target = False self.alpha_log = None self.alpha_optim = None self.target_entropy = None self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda
def __init__(self): AgentBase.__init__(self) self.ClassCri = DiscreteCriSAC self.ClassAct = DiscreteActSAC self.train_reward = [] self.if_use_cri_target = True self.if_use_act_target = False self.trajectory_list = [] self.alpha_log = None self.alpha_optim = None self.target_entropy = None self.obj_critic = (-np.log(0.5))**0.5 # for reliable_lambda self.train_iteraion = 0
def __init__(self): AgentBase.__init__(self) self.ClassCri = None # self.ClassCri = QNetDuel if self.if_use_dueling else QNet self.if_use_dueling = True # self.ClassCri = QNetDuel if self.if_use_dueling else QNet self.explore_rate = 0.25 # the probability of choosing action randomly in epsilon-greedy