def __init__(self, args, env): self.args = args self.env = env self.evolver = utils_ne.SSNE(self.args) self.best_r = 0 self.best_state = [] #Init population self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) #Turn off gradients and put in eval mode for actor in self.pop: actor.eval() #Init RL Agent self.rl_agent = ddpg.DDPG(args) self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.ounoise = ddpg.OUNoise(args.action_dim) #Trackers self.num_games = 0 self.num_frames = 0 self.gen_frames = None
def __init__(self, args: Parameters, env): self.args = args; self.env = env # Init population self.pop = [] self.buffers = [] for _ in range(args.pop_size): self.pop.append(ddpg.GeneticAgent(args)) # Init RL Agent self.rl_agent = ddpg.DDPG(args) if args.per: self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device, beta_frames=self.args.num_frames) else: self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device) self.ounoise = ddpg.OUNoise(args.action_dim) self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate) # Population novelty self.ns_r = 1.0 self.ns_delta = 0.1 self.best_train_reward = 0.0 self.time_since_improv = 0 self.step = 1 # Trackers self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
def test_mutation(self): models = [800, 1400, 1600, 1800, 2200] source_dir = 'exp/cheetah_sm0.1_distil_save_20/models/' pr, nmr, smr = [], [], [] ps, nms, sms = [], [], [] ssne = mod_neuro_evo.SSNE(self.args, None, None) for i, model in enumerate(models): print("========== Mutation for {} ==============".format(model)) agent = self.load_genetic_agent(source_dir, model) p_reward, p_states = self.evaluate(agent) pr.append(p_reward) ps.append(p_states) nchild = ddpg.GeneticAgent(self.args) ssne.clone(agent, nchild) ssne.mutate_inplace(nchild) nm_reward, nm_states = self.evaluate(nchild) nmr.append(nm_reward) nms.append(nm_states) dchild = ddpg.GeneticAgent(self.args) ssne.clone(agent, dchild) ssne.proximal_mutate(dchild, 0.05) sm_reward, sm_states = self.evaluate(dchild) smr.append(sm_reward) sms.append(sm_states) print("Parent", pr[-1]) print("Normal", nmr[-1]) print("Safe", smr[-1]) # Ablation for safe mutation ablation_mag = [0.0, 0.01, 0.05, 0.1, 0.2] agent = self.load_genetic_agent(source_dir, 2200) ablr = [] abls = [] for mag in ablation_mag: dchild = ddpg.GeneticAgent(self.args) ssne.clone(agent, dchild) ssne.proximal_mutate(dchild, mag) sm_reward, sm_states = self.evaluate(dchild) ablr.append(sm_reward) abls.append(sm_states) save_file = 'visualise/mutation' np.savez(save_file, pr=pr, nmr=nmr, smr=smr, ps=ps, nms=nms, sms=sms, ablr=ablr, abls=abls, abl_mag=ablation_mag)
def __init__(self, args, env): self.args = args self.evolver = utils_ne.SSNE(self.args) # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size) self.pop = [] for _ in range(args.pop_size): self.pop.append(ddpg.Actor(args)) for actor in self.pop: actor.eval() # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)] # args.is_cuda = True; args.is_memory_cuda = True self.rl_agent = ddpg.DDPG(args) # self.rl_agent.share_memory() self.ounoise = ddpg.OUNoise(args.action_dim) # self.replay_queue = mp.Manager().Queue() # mp.Manager().list() # self.replay_queue = mp.Queue() # self.replay_memory = mp.Manager().list() # self.replay_memory = mp.Array() # self.replay_memory = mp.Queue() self.workers = self.pop.append(self.rl_agent.actor) # for key in range(self.args.pop_size): # self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size) # self.learner = LearnerThread(self.replay_memory, self.rl_agent) # self.learner.start() # Stats # self.timers = { # k: TimerStat() # for k in [ # "put_weights", "get_samples", "sample_processing", # "replay_processing", "update_priorities", "train", "sample" # ] # } self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0
def test_crossover(self): source_dir = 'exp/cheetah_sm0.1_distil_save_20/models/' models = [1400, 1600, 1800, 2200] parent1 = [] parent2 = [] normal_cro = [] distil_cro = [] p1s, p2s, ncs, dcs = [], [], [], [] for i, model1 in enumerate(models): for j, model2 in enumerate(models): if j > i: print( "========== Crossover between {} and {} ==============" .format(model1, model2)) critic = ddpg.Critic(self.args) critic_path = os.path.join( source_dir, 'evo_net_critic_{}.pkl'.format(model2)) critic.load_state_dict(torch.load(critic_path)) agent1 = self.load_genetic_agent(source_dir, model1) agent2 = self.load_genetic_agent(source_dir, model2) p1_reward, p1_states = self.evaluate(agent1) p2_reward, p2_states = self.evaluate(agent2) parent1.append(p1_reward) parent2.append(p2_reward) p1s.append(p1_states) p2s.append(p2_states) ssne = mod_neuro_evo.SSNE(self.args, critic, None) child1 = ddpg.GeneticAgent(self.args) child2 = ddpg.GeneticAgent(self.args) ssne.clone(agent1, child1) ssne.clone(agent2, child2) ssne.crossover_inplace(child1, child2) c1_reward, c1_states = self.evaluate(child1) normal_cro.append(c1_reward) ncs.append(c1_states) child = ssne.distilation_crossover(agent1, agent2) c_reward, c_states = self.evaluate(child) distil_cro.append(c_reward) dcs.append(c_states) print(parent1[-1]) print(parent2[-1]) print(normal_cro[-1]) print(distil_cro[-1]) print() save_file = 'visualise/crossover' np.savez(save_file, p1=parent1, p2=parent2, nc=normal_cro, dc=distil_cro, p1s=p1s, p2s=p2s, ncs=ncs, dcs=dcs)