def __init__(self, state_size, action_size, num_agents, lr_actor=1.0e-4, lr_critic=1.0e-4): super(DDPGAgent, self).__init__() self.actor = Actor(state_size, action_size).to(device) self.critic = Critic(state_size, action_size, num_agents, seed=0).to(device) self.target_actor = Actor(state_size, action_size).to(device) self.target_critic = Critic(state_size, action_size, num_agents, seed=0).to(device) self.noise = OUNoise(action_size, scale=1.0 ) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-2, lr_critic=1.0e-2, weight_decay=1.0e-5, device='cuda:0'): super(DDPGAgent, self).__init__() hidden_gat_dim = 64 self.actor = ActorNetwork(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.critic = CriticNetwork(in_critic, hidden_gat_dim, hidden_in_critic, hidden_out_critic, 1).to(device) # print("actor parameters are: " + str(self.count_parameters(self.actor))) # print("critic parameters are: " + str(self.count_parameters(self.critic))) self.target_actor = ActorNetwork(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.target_critic = CriticNetwork(in_critic, hidden_gat_dim, hidden_in_critic, hidden_out_critic, 1).to(device) self.noise = OUNoise(out_actor, scale=1.0) self.device = device # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=weight_decay)
def __init__(self, state_size, action_size, random_seed, num_agents=2, lr_actor=1e-3, lr_critic=1e-3, gamma=0.99, tau=1e-3, batch_size=512, buffer_size=int(1e5), update_every=20, num_updates=10): super(DDPGAgent, self).__init__() self.state_size = state_size self.action_size = action_size self.n_agents = num_agents self.seed = random.seed(random_seed) self.seed = torch.manual_seed(random_seed) self.update_every = update_every self.num_updates = num_updates self.batch_size = batch_size self.gamma = gamma self.tau = tau # Actor and Critic with their respective targets self.actor = Actor(state_size, action_size, random_seed).to(device) self.target_actor = Actor(state_size, action_size, random_seed).to(device) self.critic = Critic(state_size, action_size, random_seed).to(device) self.target_critic = Critic(state_size, action_size, random_seed).to(device) # Optimizers self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic) hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) # Noise Process self.noise = OUNoise((num_agents, action_size), random_seed) self.memory = ReplayBuffer(buffer_size, batch_size, random_seed) self.t_step = 0
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-3, lr_critic=1.0e-3, noise_dist: str = 'normal', checkpoint_path=None) -> None: super(DDPGAgent, self).__init__() self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.noise = OUNoise(out_actor, scale=1.0, noise_dist=noise_dist) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5) if checkpoint_path: checkpoint = torch.load(checkpoint_path) self.actor.load_state_dict(checkpoint[0]['actor_params']) self.target_actor.load_state_dict(checkpoint[0]['actor_params']) self.critic.load_state_dict(checkpoint[0]['critic_params']) self.target_critic.load_state_dict(checkpoint[0]['critic_params'])
def __init__(self, in_actor, out_actor, in_critic, lr_actor=1.0e-4, lr_critic=1.0e-3): super(DDPGAgent, self).__init__() self.actor = Actor(in_actor, out_actor).to(device) self.critic = Critic(in_critic, out_actor * 2).to(device) self.target_actor = Actor(in_actor, out_actor).to(device) self.target_critic = Critic(in_critic, out_actor * 2).to(device) self.noise = OUNoise(out_actor, scale=1.0) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
def __init__(self, in_actor, out_actor, hidden_in_actor, hidden_out_actor, state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-4, lr_critic=1.0e-3): super(DDPGAgent, self).__init__() self.actor = Actor(in_actor, out_actor, hidden_in_actor, hidden_out_actor).to(device) self.critic = Critic(state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic).to(device) self.target_actor = Actor(in_actor, out_actor, hidden_in_actor, hidden_out_actor).to(device) self.target_critic = Critic(state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic).to(device) self.noise = OUNoise(out_actor, scale=1.0 ) self.tau = TAU # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
def __init__(self, state_size, action_size, num_agents, hidden_in_actor=512, hidden_out_actor=256, lr_actor=1e-4, hidden_in_critic=512, hidden_out_critic=256, lr_critic=3e-4, weight_decay_critic=0, seed=1, device='cpu'): super(DDPGAgent, self).__init__() self.device = device # Actor self.actor = ActorNetwork(state_size, hidden_in_actor, hidden_out_actor, action_size, seed).to(device) self.target_actor = ActorNetwork(state_size, hidden_in_actor, hidden_out_actor, action_size, seed).to(device) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) # Target self.critic = CriticNetwork(state_size, action_size, num_agents, hidden_in_critic, hidden_out_critic, seed).to(device) self.target_critic = CriticNetwork(state_size, action_size, num_agents, hidden_in_critic, hidden_out_critic, seed).to(device) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=weight_decay_critic) # Noise self.noise = OUNoise(action_size, seed, scale=1.0) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic)
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-2, lr_critic=1.0e-2): super(DDPGAgent, self).__init__() self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) #self.noise = OUNoise(out_actor, scale=1.0 ) self.noise = RNoise(out_actor, 0.5) self.epsilon = 1. self.epsilon_decay_rate = 0.999 self.epsilon_min = 0.2 # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor, weight_decay=0.0) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=0.0)
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-4, lr_critic=1.0e-4): super(DDPGAgent, self).__init__() self.state_size = in_actor self.action_size = out_actor self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.noise = OUNoise(out_actor, scale=1.0 ) # self.noise = OUNoise(action_size) #single agent only self.noise_scale = NOISE_START # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
def __init__(self, state_size, action_size, num_agents, hidden_actor, hidden_critic, lr_actor, lr_critic, buffer_size, agent_id, use_PER=False, seed=0): super(DDPGAgent, self).__init__() self.seed = torch.manual_seed(seed) self.agent_id = agent_id # num_agents*action_size self.actor_local = ActorNet(state_size, hidden_actor, action_size, seed=seed).to(device) self.critic_local = CriticNet(num_agents*state_size, num_agents*action_size, hidden_critic, 1, seed=seed).to(device) self.actor_target = ActorNet(state_size, hidden_actor, action_size, seed=seed).to(device) self.critic_target = CriticNet(num_agents*state_size, num_agents*action_size, hidden_critic, 1, seed=seed).to(device) self.actor_optimizer = Adam(self.actor_local.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic_local.parameters(), lr=lr_critic, weight_decay=0.) #weight_decay=1.e-5 self.memory = ReplayBuffer(buffer_size, num_agents, state_size, action_size, use_PER) # initialize targets same as original networks hard_update(self.actor_target, self.actor_local) hard_update(self.critic_target, self.critic_local)
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=3.0e-5, lr_critic=1.0e-5): super(DDPGAgent, self).__init__() self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device) self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device) self.noise = OUNoise(out_actor, scale=1.0) # initialize targets same as original networks hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor, weight_decay=1.e-5) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
def __init__(self, discount_factor=0.95, tau=0.1): super(MADDPG, self).__init__() # DDGAgent used only to train independent actors self.maddpg_agent = [ DDPGAgent(24, 256, 128, 2), DDPGAgent(24, 256, 128, 2) ] # Shared critic trained for both agents # critic input = obs_full + actions = 48+2+2=52 self.critic = Network(52, 256, 128, 1).to(device) self.target_critic = Network(52, 256, 128, 1).to(device) # initialize targets same as original networks hard_update(self.target_critic, self.critic) self.critic_optimizer = Adam(self.critic.parameters(), lr=1.0e-3, weight_decay=0.0) self.discount_factor = discount_factor self.tau = tau self.iter = 0
def __init__(self, state_size, action_size, num_agents, lr_actor=1.0e-4, lr_critic=1.0e-3): super(DDPGAgent, self).__init__() self.actor = networkforall.Actor(state_size, action_size).to(device) self.critic = networkforall.Critic(state_size, action_size, num_agents).to(device) self.target_actor = networkforall.Actor(state_size, action_size).to(device) self.target_critic = networkforall.Critic(state_size, action_size, num_agents).to(device) self.noise = OUNoise(action_size, scale=1.0) hard_update(self.target_actor, self.actor) hard_update(self.target_critic, self.critic) self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
def hard_update_targets(self,agent_num): """soft update targets""" self.iter += 1 ddpg_agent = self.maddpg_agent[agent_num] hard_update(ddpg_agent.target_actor, ddpg_agent.actor) hard_update(ddpg_agent.target_critic, ddpg_agent.critic)
def __init__(self, beta, pi, v_fn, fvp): super(TRPOmp, self).__init__(beta, v_fn) self.name = "TRPO" self.pi = pi self.fvp = fvp utils.hard_update(self.pi, self.beta)
def __init__(self, beta, q_fn, q_fn_targ, replay_memory): super(TD3, self).__init__(beta, q_fn, None, replay_memory, None) self.name = "TD3" self.q_fn_targ = q_fn_targ utils.hard_update(self.q_fn_targ, self.q_fn)