def __init__(self, seed=0, noise_start=1.0, gamma=0.99, t_stop_noise=30000, action_size=2, n_agents=2, buffer_size=100000, batch_size=256, update_every=2, noise_decay=1.0): """ Params ====== action_size (int): dimension of each action seed (int): Random seed n_agents (int): number of distinct agents buffer_size (int): replay buffer size batch_size (int): minibatch size gamma (float): discount factor noise_start (float): initial noise weighting factor noise_decay (float): noise decay rate update_every (int): how often to update the network t_stop_noise (int): max number of timesteps with noise applied in training """ self.seed = seed self.buffer_size = buffer_size self.batch_size = batch_size self.update_every = update_every self.gamma = gamma self.n_agents = n_agents self.noise_weight = noise_start self.noise_decay = noise_decay self.t_step = 0 self.noise_on = True self.t_stop_noise = t_stop_noise # create two agents, each with their own actor and critic models = [ model.Actor_Critic_Models(n_agents=n_agents) for _ in range(n_agents) ] self.agents = [DDPG(i, models[i], self.seed) for i in range(n_agents)] # create shared replay buffer self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, self.seed)
def __init__(self, action_size=2, n_agents=2, seed=0): """ Params ====== action_size (int): dimension of each action seed (int): Random seed n_agents (int): number of agents """ self.n_agents = n_agents self.t_step = 0 self.noise_on = True # create two agents, each with their own actor and critic models = [ model.Actor_Critic_Models(n_agents=n_agents) for _ in range(n_agents) ] self.agents = [DDPG(i, models[i]) for i in range(n_agents)] # create shared replay buffer self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
def __init__(self, action_size=2, seed=0, n_agents=2, buffer_size=10000, batch_size=256, gamma=0.99, update_every=2): """ Params ====== action_size (int): dimension of each action seed (int): Random seed n_agents (int): number of distinct agents buffer_size (int): replay buffer size batch_size (int): minibatch size gamma (float): discount factor update_every (int): how often to update the network """ self.buffer_size = buffer_size self.batch_size = batch_size self.update_every = update_every self.gamma = gamma self.n_agents = n_agents self.t_step = 0 self.noise_on = True # create two agents, each with their own actor and critic models = [ model.Actor_Critic_Models(n_agents=n_agents) for _ in range(n_agents) ] self.agents = [DDPG(i, models[i]) for i in range(n_agents)] # create shared replay buffer self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed)