def __init__(self, args, env): self.args = args self.env = env self.evolver = utils_ne.SSNE(self.args) self.best_r = 0 self.best_state = [] #Init population self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) #Turn off gradients and put in eval mode for actor in self.pop: actor.eval() #Init RL Agent self.rl_agent = ddpg.DDPG(args) self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.ounoise = ddpg.OUNoise(args.action_dim) #Trackers self.num_games = 0 self.num_frames = 0 self.gen_frames = None
def __init__(self, args, env): self.args = args self.evolver = utils_ne.SSNE(self.args) # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) for actor in self.pop: actor.eval() # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)] # args.is_cuda = True; args.is_memory_cuda = True self.rl_agent = ddpg.DDPG(args) # self.rl_agent.share_memory() self.ounoise = ddpg.OUNoise(args.action_dim) # self.replay_queue = mp.Manager().Queue() # mp.Manager().list() # self.replay_queue = mp.Queue() # self.replay_memory = mp.Manager().list() # self.replay_memory = mp.Array() # self.replay_memory = mp.Queue() self.workers = self.pop.append(self.rl_agent.actor) # for key in range(self.args.pop_size): # self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size) # self.learner = LearnerThread(self.replay_memory, self.rl_agent) # self.learner.start() # Stats # self.timers = { # k: TimerStat() # for k in [ # "put_weights", "get_samples", "sample_processing", # "replay_processing", "update_priorities", "train", "sample" # ] # } self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0
# print() if not self.replay_memory.empty(): print(self.replay_memory.qsize()) time.sleep(1) print(self.replay_memory.get_nowait()) if __name__ == "__main__": args = Parameters() env = utils.NormalizedActions(gym.make(env_tag)) args.action_dim = env.action_space.shape[0] args.state_dim = env.observation_space.shape[0] pop = [] for _ in range(1): pop.append(ddpg.Actor(args)) for actor in pop: actor.eval() mp.set_start_method('spawn') replay_memory = mp.Queue() learner = LearnerThread(replay_memory) learner.start() # dict_all_returns = mp.Manager().dict() processes = [] time_start = time.time() for key, pop in enumerate(pop): pop.share_memory()