def __init__(self): td3_algorithm_params = TD3AlgorithmParameters() super().__init__(algorithm=td3_algorithm_params, exploration=TD3AgentExplorationParameters(), memory=EpisodicExperienceReplayParameters(), networks=OrderedDict([("actor", TD3ActorNetworkParameters()), ("critic", TD3CriticNetworkParameters(td3_algorithm_params.num_q_networks))]))
def __init__(self): super().__init__(algorithm=DDPGAlgorithmParameters(), exploration=OUProcessParameters(), memory=EpisodicExperienceReplayParameters(), networks=OrderedDict([ ("actor", DDPGActorNetworkParameters()), ("critic", DDPGCriticNetworkParameters()) ]))
def __init__(self): super().__init__(algorithm=ClippedPPOAlgorithmParameters(), exploration={ DiscreteActionSpace: CategoricalParameters(), BoxActionSpace: AdditiveNoiseParameters() }, memory=EpisodicExperienceReplayParameters(), networks={"main": ClippedPPONetworkParameters()})
def __init__(self): super().__init__(algorithm=DDPGAlgorithmParameters(), exploration=OUProcessParameters(), memory=EpisodicExperienceReplayParameters(), networks={ "actor": DDPGActorNetworkParameters(), "critic": DDPGCriticNetworkParameters() })
def __init__(self): super().__init__(algorithm=PPOAlgorithmParameters(), exploration=AdditiveNoiseParameters(), memory=EpisodicExperienceReplayParameters(), networks={ "critic": PPOCriticNetworkParameters(), "actor": PPOActorNetworkParameters() })
def __init__(self): super().__init__(algorithm=PPOAlgorithmParameters(), exploration={ DiscreteActionSpace: CategoricalParameters(), BoxActionSpace: AdditiveNoiseParameters() }, memory=EpisodicExperienceReplayParameters(), networks={ "critic": PPOCriticNetworkParameters(), "actor": PPOActorNetworkParameters() })
def __init__(self, agent_index, use_batchnorm=False): # def __init__(self, use_batchnorm=False, name, model, obs_shape_n, act_space_n, agent_index, local_q_func=False): # self.agent_index = agent_index super().__init__( algorithm=MADDPGAlgorithmParameters(), exploration=OUProcessParameters(), memory=EpisodicExperienceReplayParameters(), networks=OrderedDict([ ("actor" + str(agent_index), MADDPGActorNetworkParameters(use_batchnorm=use_batchnorm)), ("critic" + str(agent_index), MADDPGCriticNetworkParameters(use_batchnorm=use_batchnorm)) ]))
def __init__(self): super().__init__() self.algorithm = PALAlgorithmParameters() self.memory = EpisodicExperienceReplayParameters()
def __init__(self): super().__init__( algorithm=ACERAlgorithmParameters(), exploration={DiscreteActionSpace: CategoricalParameters()}, memory=EpisodicExperienceReplayParameters(), networks={"main": ACERNetworkParameters()})
def __init__(self): super().__init__(algorithm=NAFAlgorithmParameters(), exploration=OUProcessParameters(), memory=EpisodicExperienceReplayParameters(), networks={"main": NAFNetworkParameters()})
def __init__(self): super().__init__(algorithm=HumanAlgorithmParameters(), exploration=EGreedyParameters(), memory=EpisodicExperienceReplayParameters(), networks={"main": BCNetworkParameters()})
def __init__(self): super().__init__() self.algorithm = MixedMonteCarloAlgorithmParameters() self.memory = EpisodicExperienceReplayParameters()
def __init__(self): super().__init__(algorithm=ClippedPPOAlgorithmParameters(), exploration=AdditiveNoiseParameters(), memory=EpisodicExperienceReplayParameters(), networks={"main": ClippedPPONetworkParameters()})