Beispiel #1
0
 def __init__(self):
     AgentBase.__init__(self)
     self.ClassAct = ShareBiConv
     self.ClassCri = self.ClassAct
     self.if_use_cri_target = True
     self.if_use_act_target = True
     self.obj_critic = (-np.log(0.5))**0.5  # for reliable_lambda
Beispiel #2
0
 def __init__(self):
     AgentBase.__init__(self)
     self.ClassAct = ActorBiConv
     self.ClassCri = CriticBiConv
     self.if_use_cri_target = False
     self.if_use_act_target = False
     self.explore_noise = 2**-8
     self.obj_critic = (-np.log(0.5))**0.5  # for reliable_lambda
Beispiel #3
0
    def __init__(self):
        AgentBase.__init__(self)
        self.ClassAct = Actor
        self.ClassCri = Critic
        self.if_use_cri_target = True
        self.if_use_act_target = True

        self.explore_noise = 0.3  # explore noise of action (OrnsteinUhlenbeckNoise)
        self.ou_noise = None
Beispiel #4
0
    def __init__(self):
        AgentBase.__init__(self)
        self.ClassAct = Actor
        self.ClassCri = CriticTwin
        self.if_use_cri_target = True
        self.if_use_act_target = True

        self.explore_noise = 0.1  # standard deviation of exploration noise
        self.policy_noise = 0.2  # standard deviation of policy noise
        self.update_freq = 2  # delay update frequency
Beispiel #5
0
    def __init__(self):
        AgentBase.__init__(self)
        self.ClassAct = ActorPPO
        self.ClassCri = CriticPPO

        self.if_off_policy = False
        self.ratio_clip = 0.2  # could be 0.00 ~ 0.50 ratio.clamp(1 - clip, 1 + clip)
        self.lambda_entropy = 0.02  # could be 0.00~0.10
        self.lambda_a_value = 1.00  # could be 0.25~8.00, the lambda of advantage value
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
Beispiel #6
0
    def __init__(self):
        AgentBase.__init__(self)
        self.ClassCri = CriticTwin
        self.ClassAct = ActorSAC
        self.if_use_cri_target = True
        self.if_use_act_target = False

        self.alpha_log = None
        self.alpha_optim = None
        self.target_entropy = None
        self.obj_critic = (-np.log(0.5))**0.5  # for reliable_lambda
Beispiel #7
0
 def __init__(self):
     AgentBase.__init__(self)
     self.ClassCri = DiscreteCriSAC
     self.ClassAct = DiscreteActSAC
     self.train_reward = []
     self.if_use_cri_target = True
     self.if_use_act_target = False
     self.trajectory_list = []
     self.alpha_log = None
     self.alpha_optim = None
     self.target_entropy = None
     self.obj_critic = (-np.log(0.5))**0.5  # for reliable_lambda
     self.train_iteraion = 0
Beispiel #8
0
 def __init__(self):
     AgentBase.__init__(self)
     self.ClassCri = None  # self.ClassCri = QNetDuel if self.if_use_dueling else QNet
     self.if_use_dueling = True  # self.ClassCri = QNetDuel if self.if_use_dueling else QNet
     self.explore_rate = 0.25  # the probability of choosing action randomly in epsilon-greedy