Beispiel #1
0
 def __init__(self):
     super().__init__(algorithm=DDPGAlgorithmParameters(),
                      exploration=OUProcessParameters(),
                      memory=EpisodicExperienceReplayParameters(),
                      networks=OrderedDict([
                          ("actor", DDPGActorNetworkParameters()),
                          ("critic", DDPGCriticNetworkParameters())
                      ]))
Beispiel #2
0
 def __init__(self):
     super().__init__(algorithm=DDPGAlgorithmParameters(),
                      exploration=OUProcessParameters(),
                      memory=EpisodicExperienceReplayParameters(),
                      networks={
                          "actor": DDPGActorNetworkParameters(),
                          "critic": DDPGCriticNetworkParameters()
                      })
Beispiel #3
0
 def __init__(self, agent_index, use_batchnorm=False):
     # def __init__(self, use_batchnorm=False, name, model, obs_shape_n, act_space_n, agent_index, local_q_func=False):
     #     self.agent_index = agent_index
     super().__init__(
         algorithm=MADDPGAlgorithmParameters(),
         exploration=OUProcessParameters(),
         memory=EpisodicExperienceReplayParameters(),
         networks=OrderedDict([
             ("actor" + str(agent_index),
              MADDPGActorNetworkParameters(use_batchnorm=use_batchnorm)),
             ("critic" + str(agent_index),
              MADDPGCriticNetworkParameters(use_batchnorm=use_batchnorm))
         ]))
Beispiel #4
0
 def __init__(self):
     super().__init__(algorithm=NAFAlgorithmParameters(),
                      exploration=OUProcessParameters(),
                      memory=EpisodicExperienceReplayParameters(),
                      networks={"main": NAFNetworkParameters()})
Beispiel #5
0
# top agent
top_agent_params = HACDDPGAgentParameters()

top_agent_params.memory = EpisodicHRLHindsightExperienceReplayParameters()
top_agent_params.memory.max_size = (MemoryGranularity.Transitions, 10000000)
top_agent_params.memory.hindsight_transitions_per_regular_transition = 3
top_agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
top_agent_params.memory.goals_space = goals_space
top_agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
    32)
top_agent_params.algorithm.num_consecutive_training_steps = 40
top_agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
    40)

# exploration - OU process
top_agent_params.exploration = OUProcessParameters()
top_agent_params.exploration.theta = 0.1

# actor
top_actor = top_agent_params.network_wrappers['actor']
top_actor.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
top_actor.middleware_parameters.scheme = [Dense(64)] * 3
top_actor.learning_rate = 0.001
top_actor.batch_size = 4096

# critic
top_critic = top_agent_params.network_wrappers['critic']
top_critic.input_embedders_parameters = {