def __init__(self, n_agents, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== n_agents (int): number of agents state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.n_agents = n_agents self.state_size = state_size self.action_size = action_size self.seed = np.random.seed(random_seed) random.seed(random_seed) # Actor Network (w/ Target Network) # self.actor_local = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) # self.actor_target = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.actor_local = Actor(state_size, action_size, seed=random_seed).to(device) self.actor_target = Actor(state_size, action_size, seed=random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) # self.critic_local = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) # self.critic_target = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.critic_local = Critic(state_size, action_size, seed=random_seed).to(device) self.critic_target = Critic(state_size, action_size, seed=random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC) # initialize targets same as original networks self.hard_update(self.actor_target, self.actor_local) self.hard_update(self.critic_target, self.critic_local) # Noise process # self.noise = OUNoise((n_agents, action_size), random_seed) self.noise = OUNoise(action_size, random_seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, n_agents, state_size, action_size, random_seed, prioritized_reply=False): """Initialize an Agent object. Params ====== n_agents (int): number of agents state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed prioritized_reply (bool): True or False for using prioritized reply buffer, default is False """ self.n_agents = n_agents self.state_size = state_size self.action_size = action_size self.seed = np.random.seed(random_seed) random.seed(random_seed) self.prioritized_reply = prioritized_reply # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.actor_target = Actor(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.critic_target = Critic(state_size, action_size, seed=random_seed, leak=LEAKINESS).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC) # Noise process self.noise = OUNoise((20, action_size), random_seed) # Replay memory if self.prioritized_reply: self.memory = PreReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed, ALPHA) # Initialize learning step for updating beta self.learn_step = 0 else: self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0