예제 #1
0
    def evolve(self, pop, net_inds, fitness_evals, migration, states):
        """Method to implement a round of selection and mutation operation
			Parameters:
				  pop (shared_list): Population of models
				  net_inds (list): Indices of individuals evaluated this generation
				  fitness_evals (list of lists): Fitness values for evaluated individuals
				  **migration (object): Policies from learners to be synced into population
			Returns:
				None
		"""

        self.gen += 1

        # Convert the list of fitness values corresponding to each individual into a float [CCEA Reduction]
        if isinstance(fitness_evals[0], list):
            for i in range(len(fitness_evals)):
                if self.ccea_reduction == "mean":
                    fitness_evals[i] = sum(fitness_evals[i]) / len(
                        fitness_evals[i])
                elif self.ccea_reduction == "leniency":
                    fitness_evals[i] = max(fitness_evals[i])
                elif self.ccea_reduction == "min":
                    fitness_evals[i] = min(fitness_evals[i])
                else:
                    sys.exit('Incorrect CCEA Reduction scheme')

        # Append new fitness to lineage
        lineage_scores = [
        ]  # Tracks the average lineage score fot the generation
        for ind, fitness in zip(net_inds, fitness_evals):
            self.lineage[ind].append(fitness)
            lineage_scores.append(
                0.75 * sum(self.lineage[ind]) / len(self.lineage[ind]) +
                0.25 * fitness
            )  # Current fitness is weighted higher than lineage info
            if len(self.lineage[ind]) > self.lineage_depth:
                self.lineage[ind].pop(0)  # Housekeeping

        # Entire epoch is handled with indices; Index rank nets by fitness evaluation (0 is the best after reversing)
        index_rank = self.list_argsort(fitness_evals)
        index_rank.reverse()
        elitist_index = index_rank[:self.
                                   num_elites]  # Elitist indexes safeguard

        # Lineage rankings to elitists
        lineage_rank = self.list_argsort(lineage_scores[:])
        lineage_rank.reverse()
        elitist_index = elitist_index + lineage_rank[:int(self.num_elites)]

        # Take out copies in elitist indices
        elitist_index = list(set(elitist_index))

        #################### MULTI_POINT SEARCH WITH ANCHORS/PROBES/BLENDS AND EXPLICIT DIVERSITY-BASED SEPARATION
        if self.scheme == 'multipoint':

            # Compute anchors
            anchor_inds = self.get_anchors(states, pop, net_inds[:],
                                           np.array(lineage_rank[:]))

            # Remove duplicates between anchors and elitists
            for i, elite in enumerate(elitist_index):
                if elite in anchor_inds: elitist_index.pop(i)

            ##################### TRANSFER INDICES BACK TO POP INDICES: Change from ind in net_inds to ind referring to the real ind in pop ###############################
            elites = [net_inds[i] for i in elitist_index]
            anchors = [net_inds[i] for i in anchor_inds]
            anchor_fitnesses = [fitness_evals[i] for i in anchor_inds]
            anchor_index_ranks = [index_rank.index(i) for i in anchor_inds]
            #######################################################################################################################################################

            # Unselects are the individuals left in the population
            unselects = [
                ind for ind in net_inds
                if ind not in elites and ind not in anchors
            ]

            # Inheritance step (sync learners to population)
            for policy in migration:
                replacee = unselects.pop(0)
                utils.hard_update(target=pop[replacee], source=policy)
                # wwid = genealogy.asexual(int(policy.wwid.item()))
                # pop[replacee].wwid[0] = wwid
                self.lineage[replacee] = []  # Reinitialize as empty

            # Sample anchors from a probability distribution formed of their relative fitnesses using a roulette wheel
            probe_allocation_inds = self.roulette_wheel(
                anchor_fitnesses,
                len(unselects) - self.num_blends)
            sampled_anchors = [anchors[i] for i in probe_allocation_inds]

            # Mutate the anchors to form probes
            for anchor_ind in sampled_anchors:
                # Mutate to form probes from anchors
                replacee = unselects.pop(0)
                utils.hard_update(target=pop[replacee], source=pop[anchor_ind])
                self.lineage[replacee] = [
                    utils.list_mean(self.lineage[anchor_ind])
                ]  # Inherit lineage from replacee
                self.mutate_inplace(pop[replacee])
            # genealogy.mutation(int(pop[replacee].wwid.item()), gen)

            if random.random() < 0.1:
                print('Evo_Info #Anchors', len(anchors), '#Probes_allocation',
                      [sampled_anchors.count(i) for i in anchors], '#elites',
                      len(elites), '#Blends', len(unselects), '#Migration',
                      len(migration), 'Nets', len(net_inds),
                      'Anchor fitness Ranks', anchor_index_ranks)

            ###### Create the blends to fill the rest of the unselects by crossovers #########
            # Number of unselects left should be even
            if len(unselects) % 2 != 0:
                unselects.append(unselects[random.randint(
                    0,
                    len(unselects) - 1)])

            for i, j in zip(unselects[0::2], unselects[1::2]):
                off_i = random.choice(anchors)
                while True:
                    off_j = random.choice(anchors)
                    if off_j != off_i: break

                utils.hard_update(target=pop[i], source=pop[off_i])
                utils.hard_update(target=pop[j], source=pop[off_j])
                self.crossover_inplace(pop[i], pop[j])
                # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
                # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
                # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2
                self.lineage[i] = [
                    0.5 * utils.list_mean(self.lineage[off_i]) +
                    0.5 * utils.list_mean(self.lineage[off_j])
                ]
                self.lineage[j] = [
                    0.5 * utils.list_mean(self.lineage[off_i]) +
                    0.5 * utils.list_mean(self.lineage[off_j])
                ]

            return anchors[0]

        ####################### OLD EVOLVER WITHOUT MULTI_POINT SEARCH ###########
        elif self.scheme == 'standard':

            # Selection step
            offsprings = self.selection_tournament(
                index_rank,
                num_offsprings=len(index_rank) - len(elitist_index) -
                len(migration),
                tournament_size=3)

            # Transcribe ranked indexes from now on to refer to net indexes
            elitist_index = [net_inds[i] for i in elitist_index]
            offsprings = [net_inds[i] for i in offsprings]

            # Figure out unselected candidates
            unselects = []
            new_elitists = []
            for i in range(len(pop)):
                if i in offsprings or i in elitist_index:
                    continue
                else:
                    unselects.append(i)
            random.shuffle(unselects)

            # Check for migration's performance
            for ind in self.migrating_inds:
                if ind in offsprings or ind in elitist_index:
                    self.rl_res['selects'] += 1
                else:
                    self.rl_res['discarded'] += 1
            self.migrating_inds = []

            # Inheritance step (sync learners to population)
            for policy in migration:
                replacee = unselects.pop(0)
                utils.hard_update(target=pop[replacee], source=policy)
                self.migrating_inds.append(replacee)
                self.lineage[replacee] = [
                    sum(lineage_scores) / len(lineage_scores)
                ]  # Initialize as average

            # Elitism step, assigning elite candidates to some unselects
            for i in elitist_index:
                if len(unselects) >= 1:
                    replacee = unselects.pop(0)
                elif len(offsprings) >= 1:
                    replacee = offsprings.pop(0)
                else:
                    continue
                new_elitists.append(replacee)
                utils.hard_update(target=pop[replacee], source=pop[i])
                # wwid = genealogy.asexual(int(pop[i].wwid.item()))
                # pop[replacee].wwid[0] = wwid
                # genealogy.elite(wwid, gen)

                self.lineage[replacee] = self.lineage[i][:]

            # Crossover for unselected genes with 100 percent probability
            if len(unselects
                   ) % 2 != 0:  # Number of unselects left should be even
                unselects.append(unselects[random.randint(
                    0,
                    len(unselects) - 1)])
            for i, j in zip(unselects[0::2], unselects[1::2]):
                off_i = random.choice(new_elitists)
                off_j = random.choice(offsprings)
                utils.hard_update(target=pop[i], source=pop[off_i])
                utils.hard_update(target=pop[j], source=pop[off_j])
                self.crossover_inplace(pop[i], pop[j])
                # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
                # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
                # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2

                self.lineage[i] = [
                    0.5 * utils.list_mean(self.lineage[off_i]) +
                    0.5 * utils.list_mean(self.lineage[off_j])
                ]
                self.lineage[j] = [
                    0.5 * utils.list_mean(self.lineage[off_i]) +
                    0.5 * utils.list_mean(self.lineage[off_j])
                ]

            # Crossover for selected offsprings
            for i, j in zip(offsprings[0::2], offsprings[1::2]):
                if random.random() < self.crossover_prob:
                    self.crossover_inplace(pop[i], pop[j])
                    # wwid1 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
                    # wwid2 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
                    # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2
                    self.lineage[i] = [
                        0.5 * utils.list_mean(self.lineage[i]) +
                        0.5 * utils.list_mean(self.lineage[j])
                    ]
                    self.lineage[j] = [
                        0.5 * utils.list_mean(self.lineage[i]) +
                        0.5 * utils.list_mean(self.lineage[j])
                    ]

            # Mutate all genes in the population except the new elitists
            for i in range(len(pop)):
                if i not in new_elitists:  # Spare the new elitists
                    if random.random() < self.mutation_prob:
                        self.mutate_inplace(pop[i])
            # genealogy.mutation(int(pop[net_i].wwid.item()), gen)

            self.all_offs[:] = offsprings[:]
            return new_elitists[0]

        else:
            sys.exit('Incorrect Evolution Scheme')
예제 #2
0
파일: train.py 프로젝트: wsg1873/MERL
	def train(self, gen, test_tracker):
		"""Main training loop to do rollouts and run policy gradients

			Parameters:
				gen (int): Current epoch of training

			Returns:
				None
		"""

		# Test Rollout
		if gen % self.args.test_gap == 0:
			self.test_agent.make_champ_team(self.agents)  # Sync the champ policies into the TestAgent
			self.test_task_pipes[0].send("START")

		# Figure out teams for Coevolution
		if self.args.ps == 'full' or self.args.ps == 'trunk':
			teams = [[i] for i in list(range(args.popn_size))]  # Homogeneous case is just the popn as a list of lists to maintain compatibility
		else:
			teams = self.make_teams(args.config.num_agents, args.popn_size, args.num_evals)  # Heterogeneous Case

		########## START EVO ROLLOUT ##########
		if self.args.popn_size > 0:
			for pipe, team in zip(self.evo_task_pipes, teams):
				pipe[0].send(team)

		########## START POLICY GRADIENT ROLLOUT ##########
		if self.args.rollout_size > 0 and not RANDOM_BASELINE:
			# Synch pg_actors to its corresponding rollout_bucket
			for agent in self.agents: agent.update_rollout_actor()

			# Start rollouts using the rollout actors
			self.pg_task_pipes[0].send('START')  # Index 0 for the Rollout bucket

			############ POLICY GRADIENT UPDATES #########
			# Spin up threads for each agent
			threads = [threading.Thread(target=agent.update_parameters, args=()) for agent in self.agents]

			# Start threads
			for thread in threads: thread.start()

			# Join threads
			for thread in threads: thread.join()

		all_fits = []
		####### JOIN EVO ROLLOUTS ########
		if self.args.popn_size > 0:
			for pipe in self.evo_result_pipes:
				entry = pipe[1].recv()
				team = entry[0];
				fitness = entry[1][0];
				frames = entry[2]

				for agent_id, popn_id in enumerate(team): self.agents[agent_id].fitnesses[popn_id].append(
					utils.list_mean(fitness))  ##Assign
				all_fits.append(utils.list_mean(fitness))
				self.total_frames += frames

		####### JOIN PG ROLLOUTS ########
		pg_fits = []
		if self.args.rollout_size > 0 and not RANDOM_BASELINE:
			entry = self.pg_result_pipes[1].recv()
			pg_fits = entry[1][0]
			self.total_frames += entry[2]

		####### JOIN TEST ROLLOUTS ########
		test_fits = []
		if gen % self.args.test_gap == 0:
			entry = self.test_result_pipes[1].recv()
			test_fits = entry[1][0]
			test_tracker.update([mod.list_mean(test_fits)], self.total_frames)
			self.test_trace.append(mod.list_mean(test_fits))

		# Evolution Step
		for agent in self.agents:
			agent.evolve()

		#Save models periodically
		if gen % 20 == 0:
			for id, test_actor in enumerate(self.test_agent.rollout_actor):
				torch.save(test_actor.state_dict(), self.args.model_save + str(id) + '_' + self.args.actor_fname)
			print("Models Saved")

		return all_fits, pg_fits, test_fits
예제 #3
0
        agent.train(epoch)

        #PRINT PROGRESS
        print('Ep:', epoch, 'Score cur/best:',
              [pprint(score) for score in agent.test_score],
              pprint(agent.best_score),
              'Time:', pprint(time.time() - gen_time), 'Len',
              pprint(agent.test_len), 'Best_action_noise_score',
              pprint(agent.best_action_noise_score), 'Best_Agent_scores',
              [pprint(score) for score in agent.best_agent_scores])

        #PRINT MORE DETAILED STATS PERIODICALLY
        if epoch % 5 == 0:  #Special Stats
            print()
            print('#Data_Created', agent.buffer_added, 'Q_Val Stats',
                  pprint(list_mean(agent.rl_agent.q['min'])),
                  pprint(list_mean(agent.rl_agent.q['max'])),
                  pprint(list_mean(agent.rl_agent.q['mean'])), 'Val Stats',
                  pprint(list_mean(agent.rl_agent.val['min'])),
                  pprint(list_mean(agent.rl_agent.val['max'])),
                  pprint(list_mean(agent.rl_agent.val['mean'])))
            print()
            print('Memory_size/mil',
                  pprint(agent.memory.num_entries / 1000000.0), 'Algo:',
                  parameters.best_fname, 'Gamma', parameters.gamma, 'RS_PROP',
                  parameters.rs_proportional_shape, 'ADVANTAGE',
                  parameters.use_advantage)
            print('Action Noise Rollouts: ',
                  [pprint(score) for score in agent.action_noise_scores])
            print()
            print(
예제 #4
0
	def train(self, gen, test_tracker, prey_tracker):
		"""Main training loop to do rollouts and run policy gradients

			Parameters:
				gen (int): Current epoch of training

			Returns:
				None
		"""

		# Test Rollout
		if gen % self.args.test_gap == 0:
			self.test_agent.make_champ_team(self.agents, self.prey_agent)  # Sync the champ policies into the TestAgent
			self.test_task_pipes[0].send("START")

		# Figure out teams for Coevolution
		teams = [[i] for i in list(range(args.popn_size))]  # Homogeneous case is just the popn as a list of lists to maintain compatibility

		########## START EVO ROLLOUT ##########
		if self.args.popn_size > 0:
			for pipe, team in zip(self.evo_task_pipes, teams):
				pipe[0].send(team)

		########## START POLICY GRADIENT ROLLOUT ##########
		if self.args.rollout_size > 0 and not RANDOM_BASELINE:
			# Synch pg_actors to its corresponding rollout_bucket
			self.agents.update_rollout_actor()
			self.prey_agent.update_rollout_actor()

			# Start rollouts using the rollout actors
			self.pg_task_pipes[0].send('START')  # Index 0 for the Rollout bucket

			############ POLICY GRADIENT UPDATES #########
			# Spin up threads for each agent
			self.agents.update_parameters()


		#PREY
		self.prey_agent.update_parameters()



		all_fits = []
		####### JOIN EVO ROLLOUTS ########
		if self.args.popn_size > 0:
			for pipe in self.evo_result_pipes:
				entry = pipe[1].recv()
				team = entry[0];
				fitness = entry[1][0]
				frames = entry[2]

				for agent_id, popn_id in enumerate(team):
					self.agents.fitnesses[popn_id].append(utils.list_mean(fitness))  ##Assign
				all_fits.append(utils.list_mean(fitness))
				self.total_frames += frames

		####### JOIN PG ROLLOUTS ########
		pg_fits = []
		if self.args.rollout_size > 0 and not RANDOM_BASELINE:
			entry = self.pg_result_pipes[1].recv()
			pg_fits = entry[1][0]
			self.total_frames += entry[2]

		####### JOIN TEST ROLLOUTS ########
		test_fits = []; prey_score = 0.0
		if gen % self.args.test_gap == 0:
			entry = self.test_result_pipes[1].recv()
			test_fits = entry[1][0]
			prey_score = mod.list_mean(entry[1][1])
			prey_tracker.update([prey_score], self.total_frames)
			test_tracker.update([mod.list_mean(test_fits)], self.total_frames)
			self.test_trace.append(mod.list_mean(test_fits))

		# Evolution Step
		self.agents.evolve()

		#Save models periodically
		if gen % 20 == 0:
			torch.save(self.test_agent.predator[0].state_dict(), self.args.model_save + 'predator_' + self.args.savetag)
			torch.save(self.test_agent.prey[0].state_dict(), self.args.model_save + 'prey_' + self.args.savetag)
			print("Models Saved")

		return all_fits, pg_fits, test_fits, prey_score
예제 #5
0
    def evolve(self, pop, net_inds, fitness_evals, migration):
        """Method to implement a round of selection and mutation operation

			Parameters:
				  pop (shared_list): Population of models
				  net_inds (list): Indices of individuals evaluated this generation
				  fitness_evals (list of lists): Fitness values for evaluated individuals
				  migration (object): Policies from learners to be synced into population

			Returns:
				None

		"""

        self.gen += 1

        #Convert the list of fitness values corresponding to each individual into a float [CCEA Reduction]
        if isinstance(fitness_evals[0], list):
            for i in range(len(fitness_evals)):
                if self.ccea_reduction == "mean":
                    fitness_evals[i] = sum(fitness_evals[i]) / len(
                        fitness_evals[i])
                elif self.ccea_reduction == "leniency":
                    fitness_evals[i] = max(fitness_evals[i])
                elif self.ccea_reduction == "min":
                    fitness_evals[i] = min(fitness_evals[i])
                else:
                    sys.exit('Incorrect CCEA Reduction scheme')

        #Append new fitness to lineage
        lineage_scores = [
        ]  #Tracks the average lineage score fot the generation
        for ind, fitness in zip(net_inds, fitness_evals):
            self.lineage[ind].append(fitness)
            lineage_scores.append(
                0.75 * sum(self.lineage[ind]) / len(self.lineage[ind]) + 0.25 *
                fitness)  #Current fitness is weighted higher than lineage info
            if len(self.lineage[ind]) > self.lineage_depth:
                self.lineage[ind].pop(0)  #Housekeeping

        # Entire epoch is handled with indices; Index rank nets by fitness evaluation (0 is the best after reversing)
        index_rank = self.list_argsort(fitness_evals)
        index_rank.reverse()
        elitist_index = index_rank[:self.
                                   num_elites]  # Elitist indexes safeguard

        #Lineage rankings to elitists
        lineage_rank = self.list_argsort(lineage_scores[:])
        lineage_rank.reverse()
        elitist_index = elitist_index + lineage_rank[:int(self.num_elites)]

        #Take out copies in elitist indices
        elitist_index = list(set(elitist_index))

        # Selection step
        offsprings = self.selection_tournament(index_rank,
                                               num_offsprings=len(index_rank) -
                                               len(elitist_index) -
                                               len(migration),
                                               tournament_size=3)

        # Transcripe ranked indexes from now on to refer to net indexes
        elitist_index = [net_inds[i] for i in elitist_index]
        offsprings = [net_inds[i] for i in offsprings]

        # Figure out unselected candidates
        unselects = []
        new_elitists = []
        for i in range(len(pop)):
            if i in offsprings or i in elitist_index:
                continue
            else:
                unselects.append(i)
        random.shuffle(unselects)

        # Inheritance step (sync learners to population)
        for policy in migration:
            replacee = unselects.pop(0)
            utils.hard_update(target=pop[replacee], source=policy)
            # wwid = genealogy.asexual(int(policy.wwid.item()))
            # pop[replacee].wwid[0] = wwid
            self.lineage[replacee] = [
                sum(lineage_scores) / len(lineage_scores)
            ]  # Initialize as average

        # Elitism step, assigning elite candidates to some unselects
        for i in elitist_index:
            if len(unselects) >= 1: replacee = unselects.pop(0)
            elif len(offsprings) >= 1: replacee = offsprings.pop(0)
            else: continue
            new_elitists.append(replacee)
            utils.hard_update(target=pop[replacee], source=pop[i])
            # wwid = genealogy.asexual(int(pop[i].wwid.item()))
            # pop[replacee].wwid[0] = wwid
            # genealogy.elite(wwid, gen)

            self.lineage[replacee] = self.lineage[i][:]

        # Crossover for unselected genes with 100 percent probability
        if len(unselects) % 2 != 0:  # Number of unselects left should be even
            unselects.append(unselects[random.randint(0, len(unselects) - 1)])
        for i, j in zip(unselects[0::2], unselects[1::2]):
            off_i = random.choice(new_elitists)
            off_j = random.choice(offsprings)
            utils.hard_update(target=pop[i], source=pop[off_i])
            utils.hard_update(target=pop[j], source=pop[off_j])
            self.crossover_inplace(pop[i], pop[j])
            # wwid1 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
            # wwid2 = genealogy.crossover(int(pop[off_i].wwid.item()), int(pop[off_j].wwid.item()), gen)
            # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2

            self.lineage[i] = [
                0.5 * utils.list_mean(self.lineage[off_i]) +
                0.5 * utils.list_mean(self.lineage[off_j])
            ]
            self.lineage[j] = [
                0.5 * utils.list_mean(self.lineage[off_i]) +
                0.5 * utils.list_mean(self.lineage[off_j])
            ]

        # Crossover for selected offsprings
        for i, j in zip(offsprings[0::2], offsprings[1::2]):
            if random.random() < self.crossover_prob:
                self.crossover_inplace(pop[i], pop[j])
                # wwid1 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
                # wwid2 = genealogy.crossover(int(pop[i].wwid.item()), int(pop[j].wwid.item()), gen)
                # pop[i].wwid[0] = wwid1; pop[j].wwid[0] = wwid2
                self.lineage[i] = [
                    0.5 * utils.list_mean(self.lineage[i]) +
                    0.5 * utils.list_mean(self.lineage[j])
                ]
                self.lineage[j] = [
                    0.5 * utils.list_mean(self.lineage[i]) +
                    0.5 * utils.list_mean(self.lineage[j])
                ]

        # Mutate all genes in the population except the new elitists
        for i in range(len(pop)):
            if i not in new_elitists:  # Spare the new elitists
                if random.random() < self.mutation_prob:
                    self.mutate_inplace(pop[i])
            # genealogy.mutation(int(pop[net_i].wwid.item()), gen)

        self.all_offs[:] = offsprings[:]
        return new_elitists[0]
예제 #6
0
    def train(self, gen, test_tracker):
        """Main training loop to do rollouts and run policy gradients

			Parameters:
				gen (int): Current epoch of training

			Returns:
				None
		"""

        # Test Rollout
        if gen % self.args.test_gap == 0:
            self.test_agent.make_champ_team(
                self.agents)  # Sync the champ policies into the TestAgent
            self.test_task_pipes[0].send("START")  # sending START signal

        # Figure out teams for Coevolution

        if self.args.ps == 'full' or self.args.ps == 'trunk':
            teams = [
                [i] for i in list(range(args.popn_size))
            ]  # returns [[0], [1], [2]..] Homogeneous case is just the popn as a list of lists to maintain compatibility
        else:
            teams = self.make_teams(args.config.num_agents, args.popn_size,
                                    args.num_evals)  # Heterogeneous Case
            # returns [[0,1,2,3..], [2,3,1,0...], ...] shuffled teams of agents, like 1st agent is from pop k, and so on....

        #teams = self.make_teams(args.config.num_agents, args.popn_size, args.num_evals)  # Heterogeneous Case

        ########## START EVO ROLLOUT ##########
        if self.args.popn_size > 0:
            for pipe, team in zip(self.evo_task_pipes, teams):
                pipe[0].send(team)  # sending team signal

        ########## START POLICY GRADIENT ROLLOUT ##########
        if self.args.rollout_size > 0 and not RANDOM_BASELINE:
            # Synch pg_actors to its corresponding rollout_bucket
            for agent in self.agents:
                agent.update_rollout_actor(
                )  # agents denote different neural network

            # Start rollouts using the rollout actors
            self.pg_task_pipes[0].send(
                'START')  # Index 0 for the Rollout bucket

            ############ POLICY GRADIENT UPDATES #########
            # Spin up threads for each agent
            # Only PG will update, evolutionary will evolve
            threads = [
                threading.Thread(target=agent.update_parameters, args=())
                for agent in self.agents
            ]

            # Start threads
            for thread in threads:
                thread.start()

            # Join threads
            for thread in threads:
                thread.join()

        all_fits = []
        ####### JOIN EVO ROLLOUTS ########
        if self.args.popn_size > 0:
            for pipe in self.evo_result_pipes:  # for each population
                entry = pipe[1].recv()
                team = entry[0]
                # team members
                fitness = entry[1][0]
                # list of fitness values for each evaluation of each team
                frames = entry[2]

                # so here, we are keeping a track of agent's ID and from which pop it came from
                # what average fitness each agent gets in each population, so it means agent corresponding to which population id gets
                # what reward when teamed up with agents picked randomly from other population, so each agent will have fitness according
                # to the length of the population
                for agent_id, popn_id in enumerate(team):
                    self.agents[agent_id].fitnesses[popn_id].append(
                        utils.list_mean(fitness)
                    )  ##Assign average of all fitness values of each evaluation to agent in that particular team
                #print("##########", fitness)
                all_fits.append(utils.list_mean(fitness))
                self.total_frames += frames

        ####### JOIN PG ROLLOUTS ########
        pg_fits = []
        if self.args.rollout_size > 0 and not RANDOM_BASELINE:
            entry = self.pg_result_pipes[1].recv(
            )  # for all the PG rollouts (50 in this case), it will store different fitness value
            pg_fits = entry[1][0]
            self.total_frames += entry[2]

        ####### JOIN TEST ROLLOUTS ########
        test_fits = []
        if gen % self.args.test_gap == 0:
            entry = self.test_result_pipes[1].recv()
            test_fits = entry[1][0]
            test_tracker.update([mod.list_mean(test_fits)], self.total_frames)
            self.test_trace.append(mod.list_mean(test_fits))

        # Evolution Step
        for agent in self.agents:
            agent.evolve()  # selection, mutation happens

        #Save models periodically
        if gen % 20 == 0:
            for id, test_actor in enumerate(self.test_agent.rollout_actor):
                torch.save(
                    test_actor.state_dict(), self.args.model_save + str(id) +
                    '_' + self.args.actor_fname)
            print("Models Saved")

        return all_fits, pg_fits, test_fits