Exemple #1
0
    def __init__(self, n_agents, state_size, action_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            n_agents (int): number of agents
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.n_agents = n_agents
        self.state_size = state_size
        self.action_size = action_size
        self.seed = np.random.seed(random_seed)
        random.seed(random_seed)

        # Actor Network (w/ Target Network)
        # self.actor_local = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device)
        # self.actor_target = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device)
        self.actor_local = Actor(state_size, action_size,
                                 seed=random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  seed=random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        # self.critic_local = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device)
        # self.critic_target = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device)
        self.critic_local = Critic(state_size, action_size,
                                   seed=random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    seed=random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)
        # self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC)

        # initialize targets same as original networks
        self.hard_update(self.actor_target, self.actor_local)
        self.hard_update(self.critic_target, self.critic_local)

        # Noise process
        # self.noise = OUNoise((n_agents, action_size), random_seed)
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self,
                 n_agents,
                 state_size,
                 action_size,
                 random_seed,
                 prioritized_reply=False):
        """Initialize an Agent object.

        Params
        ======
            n_agents (int): number of agents
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
            prioritized_reply (bool): True or False for using prioritized reply buffer, default is False
        """
        self.n_agents = n_agents
        self.state_size = state_size
        self.action_size = action_size
        self.seed = np.random.seed(random_seed)
        random.seed(random_seed)
        self.prioritized_reply = prioritized_reply

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size,
                                 action_size,
                                 seed=random_seed,
                                 leak=LEAKINESS).to(device)
        self.actor_target = Actor(state_size,
                                  action_size,
                                  seed=random_seed,
                                  leak=LEAKINESS).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size,
                                   action_size,
                                   seed=random_seed,
                                   leak=LEAKINESS).to(device)
        self.critic_target = Critic(state_size,
                                    action_size,
                                    seed=random_seed,
                                    leak=LEAKINESS).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC)

        # Noise process
        self.noise = OUNoise((20, action_size), random_seed)

        # Replay memory
        if self.prioritized_reply:
            self.memory = PreReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                          random_seed, ALPHA)
            # Initialize learning step for updating beta
            self.learn_step = 0
        else:
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       random_seed)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0