Ejemplo n.º 1
0
    def __init__(self, state_size, action_size, num_agents, lr_actor=1.0e-4, lr_critic=1.0e-4):
        super(DDPGAgent, self).__init__()

        self.actor = Actor(state_size, action_size).to(device)
        self.critic = Critic(state_size, action_size, num_agents, seed=0).to(device)
        self.target_actor = Actor(state_size, action_size).to(device)
        self.target_critic = Critic(state_size, action_size, num_agents, seed=0).to(device)

        self.noise = OUNoise(action_size, scale=1.0 )

        
        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
Ejemplo n.º 2
0
    def __init__(self,
                 in_actor,
                 hidden_in_actor,
                 hidden_out_actor,
                 out_actor,
                 in_critic,
                 hidden_in_critic,
                 hidden_out_critic,
                 lr_actor=1.0e-2,
                 lr_critic=1.0e-2,
                 weight_decay=1.0e-5,
                 device='cuda:0'):
        super(DDPGAgent, self).__init__()

        hidden_gat_dim = 64
        self.actor = ActorNetwork(in_actor,
                                  hidden_in_actor,
                                  hidden_out_actor,
                                  out_actor,
                                  actor=True).to(device)
        self.critic = CriticNetwork(in_critic, hidden_gat_dim,
                                    hidden_in_critic, hidden_out_critic,
                                    1).to(device)
        # print("actor parameters are: " + str(self.count_parameters(self.actor)))
        # print("critic parameters are: " + str(self.count_parameters(self.critic)))
        self.target_actor = ActorNetwork(in_actor,
                                         hidden_in_actor,
                                         hidden_out_actor,
                                         out_actor,
                                         actor=True).to(device)
        self.target_critic = CriticNetwork(in_critic, hidden_gat_dim,
                                           hidden_in_critic, hidden_out_critic,
                                           1).to(device)

        self.noise = OUNoise(out_actor, scale=1.0)
        self.device = device

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=lr_critic,
                                     weight_decay=weight_decay)
Ejemplo n.º 3
0
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 num_agents=2,
                 lr_actor=1e-3,
                 lr_critic=1e-3,
                 gamma=0.99,
                 tau=1e-3,
                 batch_size=512,
                 buffer_size=int(1e5),
                 update_every=20,
                 num_updates=10):
        super(DDPGAgent, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.n_agents = num_agents
        self.seed = random.seed(random_seed)
        self.seed = torch.manual_seed(random_seed)
        self.update_every = update_every
        self.num_updates = num_updates
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

        # Actor and Critic with their respective targets
        self.actor = Actor(state_size, action_size, random_seed).to(device)
        self.target_actor = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.critic = Critic(state_size, action_size, random_seed).to(device)
        self.target_critic = Critic(state_size, action_size,
                                    random_seed).to(device)

        # Optimizers
        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)

        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        # Noise Process
        self.noise = OUNoise((num_agents, action_size), random_seed)
        self.memory = ReplayBuffer(buffer_size, batch_size, random_seed)
        self.t_step = 0
Ejemplo n.º 4
0
    def __init__(self,
                 in_actor,
                 hidden_in_actor,
                 hidden_out_actor,
                 out_actor,
                 in_critic,
                 hidden_in_critic,
                 hidden_out_critic,
                 lr_actor=1.0e-3,
                 lr_critic=1.0e-3,
                 noise_dist: str = 'normal',
                 checkpoint_path=None) -> None:
        super(DDPGAgent, self).__init__()

        self.actor = Network(in_actor,
                             hidden_in_actor,
                             hidden_out_actor,
                             out_actor,
                             actor=True).to(device)
        self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic,
                              1).to(device)
        self.target_actor = Network(in_actor,
                                    hidden_in_actor,
                                    hidden_out_actor,
                                    out_actor,
                                    actor=True).to(device)
        self.target_critic = Network(in_critic, hidden_in_critic,
                                     hidden_out_critic, 1).to(device)

        self.noise = OUNoise(out_actor, scale=1.0, noise_dist=noise_dist)
        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=lr_critic,
                                     weight_decay=1.e-5)
        if checkpoint_path:
            checkpoint = torch.load(checkpoint_path)
            self.actor.load_state_dict(checkpoint[0]['actor_params'])
            self.target_actor.load_state_dict(checkpoint[0]['actor_params'])
            self.critic.load_state_dict(checkpoint[0]['critic_params'])
            self.target_critic.load_state_dict(checkpoint[0]['critic_params'])
Ejemplo n.º 5
0
    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,
                 lr_actor=1.0e-4,
                 lr_critic=1.0e-3):
        super(DDPGAgent, self).__init__()
        self.actor = Actor(in_actor, out_actor).to(device)
        self.critic = Critic(in_critic, out_actor * 2).to(device)
        self.target_actor = Actor(in_actor, out_actor).to(device)
        self.target_critic = Critic(in_critic, out_actor * 2).to(device)
        self.noise = OUNoise(out_actor, scale=1.0)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
Ejemplo n.º 6
0
    def __init__(self, in_actor, out_actor, hidden_in_actor, hidden_out_actor, state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-4, lr_critic=1.0e-3):
        super(DDPGAgent, self).__init__()

        self.actor = Actor(in_actor, out_actor, hidden_in_actor, hidden_out_actor).to(device)
        self.critic = Critic(state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic).to(device)
        self.target_actor = Actor(in_actor, out_actor, hidden_in_actor, hidden_out_actor).to(device)
        self.target_critic = Critic(state_dim_in_critic, action_dim_inp_critic, hidden_in_critic, hidden_out_critic).to(device)

      
        self.noise = OUNoise(out_actor, scale=1.0 )
        
        self.tau = TAU
        
        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
Ejemplo n.º 7
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 hidden_in_actor=512,
                 hidden_out_actor=256,
                 lr_actor=1e-4,
                 hidden_in_critic=512,
                 hidden_out_critic=256,
                 lr_critic=3e-4,
                 weight_decay_critic=0,
                 seed=1,
                 device='cpu'):
        super(DDPGAgent, self).__init__()

        self.device = device

        # Actor
        self.actor = ActorNetwork(state_size, hidden_in_actor,
                                  hidden_out_actor, action_size,
                                  seed).to(device)
        self.target_actor = ActorNetwork(state_size, hidden_in_actor,
                                         hidden_out_actor, action_size,
                                         seed).to(device)
        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)

        # Target
        self.critic = CriticNetwork(state_size, action_size, num_agents,
                                    hidden_in_critic, hidden_out_critic,
                                    seed).to(device)
        self.target_critic = CriticNetwork(state_size, action_size, num_agents,
                                           hidden_in_critic, hidden_out_critic,
                                           seed).to(device)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=lr_critic,
                                     weight_decay=weight_decay_critic)

        # Noise
        self.noise = OUNoise(action_size, seed, scale=1.0)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)
Ejemplo n.º 8
0
    def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-2, lr_critic=1.0e-2):
        super(DDPGAgent, self).__init__()

        self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
        self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)
        self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
        self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)

        #self.noise = OUNoise(out_actor, scale=1.0 )
        self.noise = RNoise(out_actor, 0.5)
        
        self.epsilon = 1.
        self.epsilon_decay_rate = 0.999
        self.epsilon_min = 0.2

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor, weight_decay=0.0)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=0.0)
    def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-4, lr_critic=1.0e-4):
        super(DDPGAgent, self).__init__()
        
        self.state_size = in_actor
        self.action_size = out_actor 

        self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
        self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)
        self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
        self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)

        self.noise = OUNoise(out_actor, scale=1.0 )
#         self.noise = OUNoise(action_size) #single agent only
        self.noise_scale = NOISE_START

        
        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
Ejemplo n.º 10
0
    def __init__(self, state_size, action_size, num_agents,
                 hidden_actor, hidden_critic, lr_actor, lr_critic,
                 buffer_size, agent_id, use_PER=False, seed=0):

        super(DDPGAgent, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.agent_id = agent_id

        # num_agents*action_size
        self.actor_local = ActorNet(state_size, hidden_actor, action_size, seed=seed).to(device)
        self.critic_local = CriticNet(num_agents*state_size, num_agents*action_size, hidden_critic, 1, seed=seed).to(device)
        self.actor_target = ActorNet(state_size, hidden_actor, action_size, seed=seed).to(device)
        self.critic_target = CriticNet(num_agents*state_size, num_agents*action_size, hidden_critic, 1, seed=seed).to(device)

        self.actor_optimizer = Adam(self.actor_local.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic_local.parameters(), lr=lr_critic, weight_decay=0.) #weight_decay=1.e-5

        self.memory = ReplayBuffer(buffer_size, num_agents, state_size, action_size, use_PER)

        # initialize targets same as original networks
        hard_update(self.actor_target, self.actor_local)
        hard_update(self.critic_target, self.critic_local)
    def __init__(self,
                 in_actor,
                 hidden_in_actor,
                 hidden_out_actor,
                 out_actor,
                 in_critic,
                 hidden_in_critic,
                 hidden_out_critic,
                 lr_actor=3.0e-5,
                 lr_critic=1.0e-5):
        super(DDPGAgent, self).__init__()

        self.actor = Network(in_actor,
                             hidden_in_actor,
                             hidden_out_actor,
                             out_actor,
                             actor=True).to(device)
        self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic,
                              1).to(device)
        self.target_actor = Network(in_actor,
                                    hidden_in_actor,
                                    hidden_out_actor,
                                    out_actor,
                                    actor=True).to(device)
        self.target_critic = Network(in_critic, hidden_in_critic,
                                     hidden_out_critic, 1).to(device)

        self.noise = OUNoise(out_actor, scale=1.0)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(),
                                    lr=lr_actor,
                                    weight_decay=1.e-5)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=lr_critic,
                                     weight_decay=1.e-5)
Ejemplo n.º 12
0
    def __init__(self, discount_factor=0.95, tau=0.1):
        super(MADDPG, self).__init__()

        # DDGAgent used only to train independent actors
        self.maddpg_agent = [
            DDPGAgent(24, 256, 128, 2),
            DDPGAgent(24, 256, 128, 2)
        ]

        # Shared critic trained for both agents
        # critic input = obs_full + actions = 48+2+2=52
        self.critic = Network(52, 256, 128, 1).to(device)
        self.target_critic = Network(52, 256, 128, 1).to(device)

        # initialize targets same as original networks
        hard_update(self.target_critic, self.critic)

        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=1.0e-3,
                                     weight_decay=0.0)

        self.discount_factor = discount_factor
        self.tau = tau
        self.iter = 0
Ejemplo n.º 13
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 lr_actor=1.0e-4,
                 lr_critic=1.0e-3):
        super(DDPGAgent, self).__init__()

        self.actor = networkforall.Actor(state_size, action_size).to(device)
        self.critic = networkforall.Critic(state_size, action_size,
                                           num_agents).to(device)

        self.target_actor = networkforall.Actor(state_size,
                                                action_size).to(device)
        self.target_critic = networkforall.Critic(state_size, action_size,
                                                  num_agents).to(device)

        self.noise = OUNoise(action_size, scale=1.0)

        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic)
 def hard_update_targets(self,agent_num):
     """soft update targets"""
     self.iter += 1
     ddpg_agent = self.maddpg_agent[agent_num]
     hard_update(ddpg_agent.target_actor, ddpg_agent.actor)
     hard_update(ddpg_agent.target_critic, ddpg_agent.critic)
Ejemplo n.º 15
0
 def __init__(self, beta, pi, v_fn, fvp):
     super(TRPOmp, self).__init__(beta, v_fn)
     self.name = "TRPO"
     self.pi = pi
     self.fvp = fvp
     utils.hard_update(self.pi, self.beta)
Ejemplo n.º 16
0
 def __init__(self, beta, q_fn, q_fn_targ, replay_memory):
     super(TD3, self).__init__(beta, q_fn, None, replay_memory, None)
     self.name = "TD3"
     self.q_fn_targ = q_fn_targ
     utils.hard_update(self.q_fn_targ, self.q_fn)