def __init__(self, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, random_seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
def __init__(self, state_size, action_size, random_seed, memory=None): self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) # Actor Network self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise self.noise = OUNoise(action_size, random_seed) # Replay memory if memory is None: self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) else: self.memory = memory
def __init__(self, args, state_size, action_size): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action """ self.args = args self.state_size = state_size self.action_size = action_size self.seed = random.seed(self.args.seed) # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, self.args.seed, fc1_units=self.args.hidden_1_size, fc2_units=self.args.hidden_2_size, fc3_units=self.args.hidden_3_size).to(device) self.actor_target = Actor(state_size, action_size, self.args.seed, fc1_units=self.args.hidden_1_size, fc2_units=self.args.hidden_2_size, fc3_units=self.args.hidden_3_size).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.args.lr_Actor, eps=self.args.adam_eps) # Critic Network (w/ Target Network) self.critic_local = Critic( state_size, action_size, self.args.seed, fc1_units=self.args.hidden_1_size, fc2_units=self.args.hidden_2_size, fc3_units=self.args.hidden_3_size).to(device) self.critic_target = Critic( state_size, action_size, self.args.seed, fc1_units=self.args.hidden_1_size, fc2_units=self.args.hidden_2_size, fc3_units=self.args.hidden_3_size).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.args.lr_Critic, eps=self.args.adam_eps, weight_decay=self.args.weight_decay) # Noise process self.noise = OUNoise(action_size, self.args.seed, sigma=self.args.noise_std) self.learning_starts = int(args.memory_capacity * args.learning_starts_ratio) self.learning_frequency = args.learning_frequency
def __init__(self, state_size, action_size, n_agents=1, seed=0): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action n_agents: number of agents it will control in the environment seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = np.random.seed(seed) random.seed(seed) self.n_agents = n_agents # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, leak=LEAKINESS, seed=seed).to(device) self.actor_target = Actor(state_size, action_size, leak=LEAKINESS, seed=seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, leak=LEAKINESS, seed=seed).to(device) self.critic_target = Critic(state_size, action_size, leak=LEAKINESS, seed=seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC) # self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) self.timesteps = 0