def evolve(self, pop, net_inds, fitness_evals, migration, states): """Method to implement a round of selection and mutation operation Parameters: pop (shared_list): Population of models net_inds (list): Indices of individuals evaluated this generation fitness_evals (list of lists): Fitness values for evaluated individuals **migration (object): Policies from learners to be synced into population Returns: None """ self.gen += 1 # Convert the list of fitness values corresponding to each individual into a float [CCEA Reduction] if isinstance(fitness_evals[0], list): for i in range(len(fitness_evals)): if self.ccea_reduction == "mean": fitness_evals[i] = sum(fitness_evals[i]) / len( fitness_evals[i]) elif self.ccea_reduction == "leniency": fitness_evals[i] = max(fitness_evals[i]) elif self.ccea_reduction == "min": fitness_evals[i] = min(fitness_evals[i]) else: sys.exit('Incorrect CCEA Reduction scheme') # Append new fitness to lineage lineage_scores = [ ] # Tracks the average lineage score fot the generation for ind, fitness in zip(net_inds, fitness_evals): self.lineage[ind].append(fitness) lineage_scores.append( 0.75 * sum(self.lineage[ind]) / len(self.lineage[ind]) + 0.25 * fitness ) # Current fitness is weighted higher than lineage info if len(self.lineage[ind]) > self.lineage_depth: self.lineage[ind].pop(0) # Housekeeping # Entire epoch is handled with indices; Index rank nets by fitness evaluation (0 is the best after reversing) index_rank = self.list_argsort(fitness_evals) index_rank.reverse() elitist_index = index_rank[:self. num_elites] # Elitist indexes safeguard # Lineage rankings to elitists lineage_rank = self.list_argsort(lineage_scores[:]) lineage_rank.reverse() elitist_index = elitist_index + lineage_rank[:int(self.num_elites)] # Take out copies in elitist indices elitist_index = list(set(elitist_index)) #################### MULTI_POINT SEARCH WITH ANCHORS/PROBES/BLENDS AND EXPLICIT DIVERSITY-BASED SEPARATION if self.scheme == 'multipoint': # Compute anchors anchor_inds = self.get_anchors(states, pop, net_inds[:], np.array(lineage_rank[:])) # Remove duplicates between anchors and elitists for i, elite in enumerate(elitist_index): if elite in anchor_inds: elitist_index.pop(i) ##################### TRANSFER INDICES BACK TO POP INDICES: Change from ind in net_inds to ind referring to the real ind in pop ############################### elites = [net_inds[i] for i in elitist_index] anchors = [net_inds[i] for i in anchor_inds] anchor_fitnesses = [fitness_evals[i] for i in anchor_inds] anchor_index_ranks = [index_rank.index(i) for i in anchor_inds] ####################################################################################################################################################### # Unselects are the individuals left in the population unselects = [ ind for ind in net_inds if ind not in elites and ind not in anchors ] # Inheritance step (sync learners to population) for policy in migration: replacee = unselects.pop(0) utils.hard_update(target=pop[replacee], source=policy) # wwid = genealogy.asexual(int(policy.wwid.item())) # pop[replacee].wwid[0] = wwid self.lineage[replacee] = [] # Reinitialize as empty # Sample anchors from a probability distribution formed of their relative fitnesses using a roulette wheel probe_allocation_inds = self.roulette_wheel( anchor_fitnesses, len(unselects) - self.num_blends) sampled_anchors = [anchors[i] for i in probe_allocation_inds] # Mutate the anchors to form probes for anchor_ind in sampled_anchors: # Mutate to form probes from anchors replacee = unselects.pop(0) utils.hard_update(target=pop[replacee], source=pop[anchor_ind]) self.lineage[replacee] = [ utils.list_mean(self.lineage[anchor_ind]) ] # Inherit lineage from replacee self.mutate_inplace(pop[replacee]) # genealogy.mutation(int(pop[replacee].wwid.item()), gen) if random.random() < 0.1: print('Evo_Info #Anchors', len(anchors), '#Probes_allocation', [sampled_anchors.count(i) for i in anchors], '#elites', len(elites), '#Blends', len(unselects), '#Migration', len(migration), 'Nets', len(net_inds), 'Anchor fitness Ranks', anchor_index_ranks) ###### Create the blends to fill the rest of the unselects by crossovers ######### # Number of unselects left should be even if len(unselects) % 2 != 0: unselects.append(unselects[random.randint( 0, len(unselects) - 1)]) for i, j in zip(unselects[0::2], unselects[1::2]): off_i = random.choice(anchors) while True: off_j = random.choice(anchors) if off_j != off_i: break utils.hard_update(target=pop[i], source=pop[off_i]) utils.hard_update(target=pop[j], source=pop[off_j]) self.crossover_inplace(pop[i], pop[j]) # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2 self.lineage[i] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] self.lineage[j] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] return anchors[0] ####################### OLD EVOLVER WITHOUT MULTI_POINT SEARCH ########### elif self.scheme == 'standard': # Selection step offsprings = self.selection_tournament( index_rank, num_offsprings=len(index_rank) - len(elitist_index) - len(migration), tournament_size=3) # Transcribe ranked indexes from now on to refer to net indexes elitist_index = [net_inds[i] for i in elitist_index] offsprings = [net_inds[i] for i in offsprings] # Figure out unselected candidates unselects = [] new_elitists = [] for i in range(len(pop)): if i in offsprings or i in elitist_index: continue else: unselects.append(i) random.shuffle(unselects) # Check for migration's performance for ind in self.migrating_inds: if ind in offsprings or ind in elitist_index: self.rl_res['selects'] += 1 else: self.rl_res['discarded'] += 1 self.migrating_inds = [] # Inheritance step (sync learners to population) for policy in migration: replacee = unselects.pop(0) utils.hard_update(target=pop[replacee], source=policy) self.migrating_inds.append(replacee) self.lineage[replacee] = [ sum(lineage_scores) / len(lineage_scores) ] # Initialize as average # Elitism step, assigning elite candidates to some unselects for i in elitist_index: if len(unselects) >= 1: replacee = unselects.pop(0) elif len(offsprings) >= 1: replacee = offsprings.pop(0) else: continue new_elitists.append(replacee) utils.hard_update(target=pop[replacee], source=pop[i]) # wwid = genealogy.asexual(int(pop[i].wwid.item())) # pop[replacee].wwid[0] = wwid # genealogy.elite(wwid, gen) self.lineage[replacee] = self.lineage[i][:] # Crossover for unselected genes with 100 percent probability if len(unselects ) % 2 != 0: # Number of unselects left should be even unselects.append(unselects[random.randint( 0, len(unselects) - 1)]) for i, j in zip(unselects[0::2], unselects[1::2]): off_i = random.choice(new_elitists) off_j = random.choice(offsprings) utils.hard_update(target=pop[i], source=pop[off_i]) utils.hard_update(target=pop[j], source=pop[off_j]) self.crossover_inplace(pop[i], pop[j]) # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2 self.lineage[i] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] self.lineage[j] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] # Crossover for selected offsprings for i, j in zip(offsprings[0::2], offsprings[1::2]): if random.random() < self.crossover_prob: self.crossover_inplace(pop[i], pop[j]) # wwid1 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen) # wwid2 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen) # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2 self.lineage[i] = [ 0.5 * utils.list_mean(self.lineage[i]) + 0.5 * utils.list_mean(self.lineage[j]) ] self.lineage[j] = [ 0.5 * utils.list_mean(self.lineage[i]) + 0.5 * utils.list_mean(self.lineage[j]) ] # Mutate all genes in the population except the new elitists for i in range(len(pop)): if i not in new_elitists: # Spare the new elitists if random.random() < self.mutation_prob: self.mutate_inplace(pop[i]) # genealogy.mutation(int(pop[net_i].wwid.item()), gen) self.all_offs[:] = offsprings[:] return new_elitists[0] else: sys.exit('Incorrect Evolution Scheme')
def train(self, gen, test_tracker): """Main training loop to do rollouts and run policy gradients Parameters: gen (int): Current epoch of training Returns: None """ # Test Rollout if gen % self.args.test_gap == 0: self.test_agent.make_champ_team(self.agents) # Sync the champ policies into the TestAgent self.test_task_pipes[0].send("START") # Figure out teams for Coevolution if self.args.ps == 'full' or self.args.ps == 'trunk': teams = [[i] for i in list(range(args.popn_size))] # Homogeneous case is just the popn as a list of lists to maintain compatibility else: teams = self.make_teams(args.config.num_agents, args.popn_size, args.num_evals) # Heterogeneous Case ########## START EVO ROLLOUT ########## if self.args.popn_size > 0: for pipe, team in zip(self.evo_task_pipes, teams): pipe[0].send(team) ########## START POLICY GRADIENT ROLLOUT ########## if self.args.rollout_size > 0 and not RANDOM_BASELINE: # Synch pg_actors to its corresponding rollout_bucket for agent in self.agents: agent.update_rollout_actor() # Start rollouts using the rollout actors self.pg_task_pipes[0].send('START') # Index 0 for the Rollout bucket ############ POLICY GRADIENT UPDATES ######### # Spin up threads for each agent threads = [threading.Thread(target=agent.update_parameters, args=()) for agent in self.agents] # Start threads for thread in threads: thread.start() # Join threads for thread in threads: thread.join() all_fits = [] ####### JOIN EVO ROLLOUTS ######## if self.args.popn_size > 0: for pipe in self.evo_result_pipes: entry = pipe[1].recv() team = entry[0]; fitness = entry[1][0]; frames = entry[2] for agent_id, popn_id in enumerate(team): self.agents[agent_id].fitnesses[popn_id].append( utils.list_mean(fitness)) ##Assign all_fits.append(utils.list_mean(fitness)) self.total_frames += frames ####### JOIN PG ROLLOUTS ######## pg_fits = [] if self.args.rollout_size > 0 and not RANDOM_BASELINE: entry = self.pg_result_pipes[1].recv() pg_fits = entry[1][0] self.total_frames += entry[2] ####### JOIN TEST ROLLOUTS ######## test_fits = [] if gen % self.args.test_gap == 0: entry = self.test_result_pipes[1].recv() test_fits = entry[1][0] test_tracker.update([mod.list_mean(test_fits)], self.total_frames) self.test_trace.append(mod.list_mean(test_fits)) # Evolution Step for agent in self.agents: agent.evolve() #Save models periodically if gen % 20 == 0: for id, test_actor in enumerate(self.test_agent.rollout_actor): torch.save(test_actor.state_dict(), self.args.model_save + str(id) + '_' + self.args.actor_fname) print("Models Saved") return all_fits, pg_fits, test_fits
agent.train(epoch) #PRINT PROGRESS print('Ep:', epoch, 'Score cur/best:', [pprint(score) for score in agent.test_score], pprint(agent.best_score), 'Time:', pprint(time.time() - gen_time), 'Len', pprint(agent.test_len), 'Best_action_noise_score', pprint(agent.best_action_noise_score), 'Best_Agent_scores', [pprint(score) for score in agent.best_agent_scores]) #PRINT MORE DETAILED STATS PERIODICALLY if epoch % 5 == 0: #Special Stats print() print('#Data_Created', agent.buffer_added, 'Q_Val Stats', pprint(list_mean(agent.rl_agent.q['min'])), pprint(list_mean(agent.rl_agent.q['max'])), pprint(list_mean(agent.rl_agent.q['mean'])), 'Val Stats', pprint(list_mean(agent.rl_agent.val['min'])), pprint(list_mean(agent.rl_agent.val['max'])), pprint(list_mean(agent.rl_agent.val['mean']))) print() print('Memory_size/mil', pprint(agent.memory.num_entries / 1000000.0), 'Algo:', parameters.best_fname, 'Gamma', parameters.gamma, 'RS_PROP', parameters.rs_proportional_shape, 'ADVANTAGE', parameters.use_advantage) print('Action Noise Rollouts: ', [pprint(score) for score in agent.action_noise_scores]) print() print(
def train(self, gen, test_tracker, prey_tracker): """Main training loop to do rollouts and run policy gradients Parameters: gen (int): Current epoch of training Returns: None """ # Test Rollout if gen % self.args.test_gap == 0: self.test_agent.make_champ_team(self.agents, self.prey_agent) # Sync the champ policies into the TestAgent self.test_task_pipes[0].send("START") # Figure out teams for Coevolution teams = [[i] for i in list(range(args.popn_size))] # Homogeneous case is just the popn as a list of lists to maintain compatibility ########## START EVO ROLLOUT ########## if self.args.popn_size > 0: for pipe, team in zip(self.evo_task_pipes, teams): pipe[0].send(team) ########## START POLICY GRADIENT ROLLOUT ########## if self.args.rollout_size > 0 and not RANDOM_BASELINE: # Synch pg_actors to its corresponding rollout_bucket self.agents.update_rollout_actor() self.prey_agent.update_rollout_actor() # Start rollouts using the rollout actors self.pg_task_pipes[0].send('START') # Index 0 for the Rollout bucket ############ POLICY GRADIENT UPDATES ######### # Spin up threads for each agent self.agents.update_parameters() #PREY self.prey_agent.update_parameters() all_fits = [] ####### JOIN EVO ROLLOUTS ######## if self.args.popn_size > 0: for pipe in self.evo_result_pipes: entry = pipe[1].recv() team = entry[0]; fitness = entry[1][0] frames = entry[2] for agent_id, popn_id in enumerate(team): self.agents.fitnesses[popn_id].append(utils.list_mean(fitness)) ##Assign all_fits.append(utils.list_mean(fitness)) self.total_frames += frames ####### JOIN PG ROLLOUTS ######## pg_fits = [] if self.args.rollout_size > 0 and not RANDOM_BASELINE: entry = self.pg_result_pipes[1].recv() pg_fits = entry[1][0] self.total_frames += entry[2] ####### JOIN TEST ROLLOUTS ######## test_fits = []; prey_score = 0.0 if gen % self.args.test_gap == 0: entry = self.test_result_pipes[1].recv() test_fits = entry[1][0] prey_score = mod.list_mean(entry[1][1]) prey_tracker.update([prey_score], self.total_frames) test_tracker.update([mod.list_mean(test_fits)], self.total_frames) self.test_trace.append(mod.list_mean(test_fits)) # Evolution Step self.agents.evolve() #Save models periodically if gen % 20 == 0: torch.save(self.test_agent.predator[0].state_dict(), self.args.model_save + 'predator_' + self.args.savetag) torch.save(self.test_agent.prey[0].state_dict(), self.args.model_save + 'prey_' + self.args.savetag) print("Models Saved") return all_fits, pg_fits, test_fits, prey_score
def evolve(self, pop, net_inds, fitness_evals, migration): """Method to implement a round of selection and mutation operation Parameters: pop (shared_list): Population of models net_inds (list): Indices of individuals evaluated this generation fitness_evals (list of lists): Fitness values for evaluated individuals migration (object): Policies from learners to be synced into population Returns: None """ self.gen += 1 #Convert the list of fitness values corresponding to each individual into a float [CCEA Reduction] if isinstance(fitness_evals[0], list): for i in range(len(fitness_evals)): if self.ccea_reduction == "mean": fitness_evals[i] = sum(fitness_evals[i]) / len( fitness_evals[i]) elif self.ccea_reduction == "leniency": fitness_evals[i] = max(fitness_evals[i]) elif self.ccea_reduction == "min": fitness_evals[i] = min(fitness_evals[i]) else: sys.exit('Incorrect CCEA Reduction scheme') #Append new fitness to lineage lineage_scores = [ ] #Tracks the average lineage score fot the generation for ind, fitness in zip(net_inds, fitness_evals): self.lineage[ind].append(fitness) lineage_scores.append( 0.75 * sum(self.lineage[ind]) / len(self.lineage[ind]) + 0.25 * fitness) #Current fitness is weighted higher than lineage info if len(self.lineage[ind]) > self.lineage_depth: self.lineage[ind].pop(0) #Housekeeping # Entire epoch is handled with indices; Index rank nets by fitness evaluation (0 is the best after reversing) index_rank = self.list_argsort(fitness_evals) index_rank.reverse() elitist_index = index_rank[:self. num_elites] # Elitist indexes safeguard #Lineage rankings to elitists lineage_rank = self.list_argsort(lineage_scores[:]) lineage_rank.reverse() elitist_index = elitist_index + lineage_rank[:int(self.num_elites)] #Take out copies in elitist indices elitist_index = list(set(elitist_index)) # Selection step offsprings = self.selection_tournament(index_rank, num_offsprings=len(index_rank) - len(elitist_index) - len(migration), tournament_size=3) # Transcripe ranked indexes from now on to refer to net indexes elitist_index = [net_inds[i] for i in elitist_index] offsprings = [net_inds[i] for i in offsprings] # Figure out unselected candidates unselects = [] new_elitists = [] for i in range(len(pop)): if i in offsprings or i in elitist_index: continue else: unselects.append(i) random.shuffle(unselects) # Inheritance step (sync learners to population) for policy in migration: replacee = unselects.pop(0) utils.hard_update(target=pop[replacee], source=policy) # wwid = genealogy.asexual(int(policy.wwid.item())) # pop[replacee].wwid[0] = wwid self.lineage[replacee] = [ sum(lineage_scores) / len(lineage_scores) ] # Initialize as average # Elitism step, assigning elite candidates to some unselects for i in elitist_index: if len(unselects) >= 1: replacee = unselects.pop(0) elif len(offsprings) >= 1: replacee = offsprings.pop(0) else: continue new_elitists.append(replacee) utils.hard_update(target=pop[replacee], source=pop[i]) # wwid = genealogy.asexual(int(pop[i].wwid.item())) # pop[replacee].wwid[0] = wwid # genealogy.elite(wwid, gen) self.lineage[replacee] = self.lineage[i][:] # Crossover for unselected genes with 100 percent probability if len(unselects) % 2 != 0: # Number of unselects left should be even unselects.append(unselects[random.randint(0, len(unselects) - 1)]) for i, j in zip(unselects[0::2], unselects[1::2]): off_i = random.choice(new_elitists) off_j = random.choice(offsprings) utils.hard_update(target=pop[i], source=pop[off_i]) utils.hard_update(target=pop[j], source=pop[off_j]) self.crossover_inplace(pop[i], pop[j]) # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen) # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2 self.lineage[i] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] self.lineage[j] = [ 0.5 * utils.list_mean(self.lineage[off_i]) + 0.5 * utils.list_mean(self.lineage[off_j]) ] # Crossover for selected offsprings for i, j in zip(offsprings[0::2], offsprings[1::2]): if random.random() < self.crossover_prob: self.crossover_inplace(pop[i], pop[j]) # wwid1 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen) # wwid2 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen) # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2 self.lineage[i] = [ 0.5 * utils.list_mean(self.lineage[i]) + 0.5 * utils.list_mean(self.lineage[j]) ] self.lineage[j] = [ 0.5 * utils.list_mean(self.lineage[i]) + 0.5 * utils.list_mean(self.lineage[j]) ] # Mutate all genes in the population except the new elitists for i in range(len(pop)): if i not in new_elitists: # Spare the new elitists if random.random() < self.mutation_prob: self.mutate_inplace(pop[i]) # genealogy.mutation(int(pop[net_i].wwid.item()), gen) self.all_offs[:] = offsprings[:] return new_elitists[0]
def train(self, gen, test_tracker): """Main training loop to do rollouts and run policy gradients Parameters: gen (int): Current epoch of training Returns: None """ # Test Rollout if gen % self.args.test_gap == 0: self.test_agent.make_champ_team( self.agents) # Sync the champ policies into the TestAgent self.test_task_pipes[0].send("START") # sending START signal # Figure out teams for Coevolution if self.args.ps == 'full' or self.args.ps == 'trunk': teams = [ [i] for i in list(range(args.popn_size)) ] # returns [[0], [1], [2]..] Homogeneous case is just the popn as a list of lists to maintain compatibility else: teams = self.make_teams(args.config.num_agents, args.popn_size, args.num_evals) # Heterogeneous Case # returns [[0,1,2,3..], [2,3,1,0...], ...] shuffled teams of agents, like 1st agent is from pop k, and so on.... #teams = self.make_teams(args.config.num_agents, args.popn_size, args.num_evals) # Heterogeneous Case ########## START EVO ROLLOUT ########## if self.args.popn_size > 0: for pipe, team in zip(self.evo_task_pipes, teams): pipe[0].send(team) # sending team signal ########## START POLICY GRADIENT ROLLOUT ########## if self.args.rollout_size > 0 and not RANDOM_BASELINE: # Synch pg_actors to its corresponding rollout_bucket for agent in self.agents: agent.update_rollout_actor( ) # agents denote different neural network # Start rollouts using the rollout actors self.pg_task_pipes[0].send( 'START') # Index 0 for the Rollout bucket ############ POLICY GRADIENT UPDATES ######### # Spin up threads for each agent # Only PG will update, evolutionary will evolve threads = [ threading.Thread(target=agent.update_parameters, args=()) for agent in self.agents ] # Start threads for thread in threads: thread.start() # Join threads for thread in threads: thread.join() all_fits = [] ####### JOIN EVO ROLLOUTS ######## if self.args.popn_size > 0: for pipe in self.evo_result_pipes: # for each population entry = pipe[1].recv() team = entry[0] # team members fitness = entry[1][0] # list of fitness values for each evaluation of each team frames = entry[2] # so here, we are keeping a track of agent's ID and from which pop it came from # what average fitness each agent gets in each population, so it means agent corresponding to which population id gets # what reward when teamed up with agents picked randomly from other population, so each agent will have fitness according # to the length of the population for agent_id, popn_id in enumerate(team): self.agents[agent_id].fitnesses[popn_id].append( utils.list_mean(fitness) ) ##Assign average of all fitness values of each evaluation to agent in that particular team #print("##########", fitness) all_fits.append(utils.list_mean(fitness)) self.total_frames += frames ####### JOIN PG ROLLOUTS ######## pg_fits = [] if self.args.rollout_size > 0 and not RANDOM_BASELINE: entry = self.pg_result_pipes[1].recv( ) # for all the PG rollouts (50 in this case), it will store different fitness value pg_fits = entry[1][0] self.total_frames += entry[2] ####### JOIN TEST ROLLOUTS ######## test_fits = [] if gen % self.args.test_gap == 0: entry = self.test_result_pipes[1].recv() test_fits = entry[1][0] test_tracker.update([mod.list_mean(test_fits)], self.total_frames) self.test_trace.append(mod.list_mean(test_fits)) # Evolution Step for agent in self.agents: agent.evolve() # selection, mutation happens #Save models periodically if gen % 20 == 0: for id, test_actor in enumerate(self.test_agent.rollout_actor): torch.save( test_actor.state_dict(), self.args.model_save + str(id) + '_' + self.args.actor_fname) print("Models Saved") return all_fits, pg_fits, test_fits