def __init__(self, args, env): self.args = args self.env = env self.evolver = utils_ne.SSNE(self.args) self.best_r = 0 self.best_state = [] #Init population self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) #Turn off gradients and put in eval mode for actor in self.pop: actor.eval() #Init RL Agent self.rl_agent = ddpg.DDPG(args) self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.ounoise = ddpg.OUNoise(args.action_dim) #Trackers self.num_games = 0 self.num_frames = 0 self.gen_frames = None
def __init__(self, args: Parameters, env): self.args = args; self.env = env # Init population self.pop = [] self.buffers = [] for _ in range(args.pop_size): self.pop.append(ddpg.GeneticAgent(args)) # Init RL Agent self.rl_agent = ddpg.DDPG(args) if args.per: self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device, beta_frames=self.args.num_frames) else: self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device) self.ounoise = ddpg.OUNoise(args.action_dim) self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate) # Population novelty self.ns_r = 1.0 self.ns_delta = 0.1 self.best_train_reward = 0.0 self.time_since_improv = 0 self.step = 1 # Trackers self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
def __init__(self, args: Parameters): self.args = args self.actor = Actor(args) self.actor_optim = Adam(self.actor.parameters(), lr=1e-3) self.buffer = replay_memory.ReplayMemory(self.args.individual_bs, args.device) self.loss = nn.MSELoss()
def __init__(self, args, env): self.args = args self.env = env #Create the Agent self.agent = magent.MetaAgent(args.state_dim, args.action_dim, args.num_subs, args.master_lr, args.sub_lr) #Init Replay Buffer self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
def __init__(self, args): self.args = args self.buffer = replay_memory.ReplayMemory(args.individual_bs, args.device) self.actor = Actor(args, init=True) self.actor_target = Actor(args, init=True) self.actor_optim = Adam(self.actor.parameters(), lr=0.5e-4) self.critic = Critic(args) self.critic_target = Critic(args) self.critic_optim = Adam(self.critic.parameters(), lr=0.5e-3) self.gamma = args.gamma self.tau = self.args.tau self.loss = nn.MSELoss() hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic)