def __init__(self, args, env): self.args = args self.env = env self.evolver = utils_ne.SSNE(self.args) self.best_r = 0 self.best_state = [] #Init population self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) #Turn off gradients and put in eval mode for actor in self.pop: actor.eval() #Init RL Agent self.rl_agent = ddpg.DDPG(args) self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.ounoise = ddpg.OUNoise(args.action_dim) #Trackers self.num_games = 0 self.num_frames = 0 self.gen_frames = None
def __init__(self, args: Parameters, env): self.args = args; self.env = env # Init population self.pop = [] self.buffers = [] for _ in range(args.pop_size): self.pop.append(ddpg.GeneticAgent(args)) # Init RL Agent self.rl_agent = ddpg.DDPG(args) if args.per: self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device, beta_frames=self.args.num_frames) else: self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device) self.ounoise = ddpg.OUNoise(args.action_dim) self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate) # Population novelty self.ns_r = 1.0 self.ns_delta = 0.1 self.best_train_reward = 0.0 self.time_since_improv = 0 self.step = 1 # Trackers self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
def __init__(self, args, env): self.args = args self.evolver = utils_ne.SSNE(self.args) # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) for actor in self.pop: actor.eval() # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)] # args.is_cuda = True; args.is_memory_cuda = True self.rl_agent = ddpg.DDPG(args) # self.rl_agent.share_memory() self.ounoise = ddpg.OUNoise(args.action_dim) # self.replay_queue = mp.Manager().Queue() # mp.Manager().list() # self.replay_queue = mp.Queue() # self.replay_memory = mp.Manager().list() # self.replay_memory = mp.Array() # self.replay_memory = mp.Queue() self.workers = self.pop.append(self.rl_agent.actor) # for key in range(self.args.pop_size): # self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size) # self.learner = LearnerThread(self.replay_memory, self.rl_agent) # self.learner.start() # Stats # self.timers = { # k: TimerStat() # for k in [ # "put_weights", "get_samples", "sample_processing", # "replay_processing", "update_priorities", "train", "sample" # ] # } self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0