class PopulationObserver: def __init__(self, ckpt, path='', env_id='Boxing-ramNoFrameskip-v4', slow_factor=0.04): self.slow_factor = slow_factor self.util = name2class[env_id] self.serializer = Serializer(path) self.to_observe: Population = self.serializer.load(ckpt) self.env = gym.make(env_id) self.util = name2class[env_id] self.state_shape = (self.util.state_dim * 4, ) self.action_dim = self.util.action_space_dim self.player = Individual(self.state_shape, self.action_dim, self.util.goal_dim) def observe(self): print('--------Population Observation Stared--------') extremes = [] for goal in self.util['objectives']: extremes.append((goal, max(self.to_observe.individuals, key=lambda x: goal.make(x)))) print('--------Observing extreme individuals--------') for goal, individual in extremes: print('Best regarding', goal, ':') pprint(individual.behavior_stats) self.play(-1, individual.get_weights()) for index, individual in enumerate(self.to_observe.individuals): print('Individual %d stats :' % index) pprint(individual.behavior_stats) self.play(index, individual.get_weights()) def play(self, index, player_weights): self.player.set_weights(player_weights) try: done = False observation = self.util.preprocess(self.env.reset()) observation = np.concatenate( [observation, observation, observation, observation]) while not done: self.env.render() action = self.player.pi.policy.get_action(observation, eval=True) for _ in range(4): observation_, _, done, _ = self.env.step(action) observation_ = self.util.preprocess(observation_) observation = np.concatenate( [observation[len(observation) // 4:], observation_]) sleep(self.slow_factor) except KeyboardInterrupt: print('individual %d skipped' % index)
def mutate(self, dna:Individual): ''' O operador de mutacao inverte um bit escolhido aleatoriamente dentro\n do cromossomo de um individuo, respeitando uma taxa de mutacao. ''' if(random() < self.mutation_rate): num1 = randint(0, len(dna.chromosome)-1) gene = dna.chromosome[num1] dna.chromosome[num1] = abs(gene-1) dna.fitness = self.get_fitness(target, dna.chromosome)
def init_population(pop_size, bounds, mode="minimization"): """ initialize non-zero positions and interaction matrix :param pop_size: population size :param bounds: upper and lower bounds :param mode: whether minimization or maximization :return: """ population = Population(pop_size, mode) vec_len = len(bounds) half = vec_len // 2 for i in range(pop_size): vector = [] # for defining interaction structure for j in range(half): if random.uniform(bounds[j][0], bounds[j][1]) < 0.5: vector.append(0) else: vector.append(1) # for defining interactions for j in range(half): vector.append( random.uniform(bounds[half + j][0], bounds[half + j][1])) population.add_individual(Individual(vector)) return population
def __init__(self, ckpt, path='', env_id='Boxing-ramNoFrameskip-v4', slow_factor=0.04): self.slow_factor = slow_factor self.util = name2class[env_id] self.serializer = Serializer(path) self.to_observe: Population = self.serializer.load(ckpt) self.env = gym.make(env_id) self.util = name2class[env_id] self.state_shape = (self.util.state_dim * 4, ) self.action_dim = self.util.action_space_dim self.player = Individual(self.state_shape, self.action_dim, self.util.goal_dim)
def uniform_random(pop_size, bounds, mode="minimization"): population = Population(pop_size, mode) for i in range(pop_size): vector = [] for j in range(len(bounds)): vector.append(random.uniform(bounds[j][0], bounds[j][1])) population.add_individual(Individual(vector)) return population
def mgbde_run(benchmark, bounds, pop_size, maxFE, F): #--- INITIALIZE A POPULATION (step #1) ----------------+ population = init.uniform_random(pop_size, bounds) cr = init_cr(pop_size) mutation_strategy = init_mutation_strategies(pop_size) #--- SOLVE --------------------------------------------+ # cycle through each generation (step #2) while benchmark.numFE < maxFE: best = init_eval(population, benchmark.eval) # cycle through each individual in the population for j in range(pop_size): x_t = population.get(j).vector #--- MUTATION (step #3.A) ---------------------+ if mutation_strategy[j] == 0: v_donor = gaussian_mutation(best, population.get(j), bounds) else: v_donor = de_best_1_mutation(best, population, j, bounds, F) #--- RECOMBINATION (step #3.B) ----------------+ v_trial = crossover(v_donor, x_t, cr[j]) #--- GREEDY SELECTION (step #3.C) -------------+ score_trial, score_target = selection(population, j, benchmark.eval, Individual(v_trial)) # --- UPDATE CR (step #3.C) -------------+ cr[j] = update_cr(score_trial, score_target, cr[j]) #--- SCORE KEEPING --------------------------------+ # gen_avg = sum(gen_scores) / pop_size # current generation avg. fitness # gen_best = min(gen_scores) # fitness of best individual # gen_sol = population.get(gen_scores.index(min(gen_scores))) # solution of best individual best = population.get_best() if benchmark.numFE % 10000 == 0: print('numFE:', benchmark.numFE) # print(' > GENERATION AVERAGE:', gen_avg) # print(' > GENERATION BEST:', gen_best) # print(' > BEST SOLUTION:', gen_sol) print('----------------------------------------------') print(' > GENERATION BEST:', best.score) print(' > GENERATION BEST:', best.vector) print('----------------------------------------------') if benchmark.is_solution(best): print('Find a solution! numFE:{0}'.format(benchmark.numFE)) break return population
def generate_population(self, max_population:int)->Population: ''' Gera uma populacao inicial completamente aleatoria.\n Uma populacao e composta por solucoes diferentes representando individuos, para esse caso cada solucao representa uma imagem. ''' population = [] for i in range(self.max_population): points = random_image(len(self.target)) fitness = self.get_fitness(self.target, points) population.append(Individual(points, fitness)) return Population(population)
def generate_offspring(self, parent_population, crossover, repair, max_exchanges, problem, fitness, mutation): size_population = len(parent_population) children = [] while size_population * 2 - 1 > len(parent_population): # select 4 random pot. parents potential_parents = [ random.randint(0, size_population - 1), random.randint(0, size_population - 1), random.randint(0, size_population - 1), random.randint(0, size_population - 1) ] # perform tournament selection to keep 2 parentA = self.tournament_selection( parent_population[potential_parents[0]], parent_population[potential_parents[1]]) parentB = self.tournament_selection( parent_population[potential_parents[2]], parent_population[potential_parents[3]]) # perform crossover [childA, childB] = crossover.perform_crossover(parentA.genotype, parentB.genotype) # perform mutation childA = repair.get_new_child( max_exchanges, problem, childA, fitness.calculate_fitness_genotype(childA, problem), fitness) childB = repair.get_new_child( max_exchanges, problem, childB, fitness.calculate_fitness_genotype(childB, problem), fitness) # add to children o_childA = Individual(childA[0], childA[1]) o_childA = mutation.mutate(o_childA, fitness, problem) o_childB = Individual(childB[0], childB[1]) o_childB = mutation.mutate(o_childB, fitness, problem) parent_population.append(Individual(childA[0], childA[1])) parent_population.append(Individual(childB[0], childB[1]))
def generate_population(self, max_population: int) -> Population: ''' Gera uma população inicial aleatória.\n Cada indivíduo na população é representado por um conjunto de cidades que devem ser percorridas respeitando o arranjo inicial.\n Para criar indivíduos diferentes é necessário permutar a ordem que as cidades devem ser percorridas. ''' permut_rate = 1.0 population = [] while (len(population) < max_population): if (random() < permut_rate): cities = self.permut(self.cities) population.append(Individual(cities)) return Population(population)
def crossover(self, dna1:Individual, dna2:Individual)->Individual: ''' O operador de crossover utiliza dois cromossomos para gerar um novo cromossomo representando um filho.\n Um ponto de corte e escolhido aleatoriamente para separar a cadeia de bits dos pais e o filho e formado com parte dos genes de cada pai. ''' if(random() > self.crossover_rate): return dna1 cutoff = randint(0, len(dna1.chromosome)-1) father_genes = dna1.chromosome[0:cutoff] mother_genes = dna2.chromosome[cutoff:] chromosome = father_genes + mother_genes fitness = self.get_fitness(target, chromosome) return Individual(points=chromosome, fitness=fitness)
def run_train(): env = gym.make("Pong-ram-v0").env observation_space = env.observation_space.shape[0] #Box(128,) action_space = env.action_space.n # Discrete(6) conf.set_params(observation_space=observation_space, action_space=action_space) I = Individual(conf) P = Population(I,conf) S = RouletteWheelSelection() C = Crossover(conf) M = Mutation(conf.mutation_rate) g = GA(P, S, C, M) best_individual = g.run(env,num_games=conf.num_games_per_individual, visualization=False) dump_model(best_individual, conf.save_path) env.close()
def run(benchmark, bounds, pop_size, F, CR, maxFE): #--- INITIALIZE A POPULATION (step #1) ----------------+ population = init.uniform_random(pop_size, bounds) #--- SOLVE --------------------------------------------+ # cycle through each generation (step #2) while benchmark.numFE < maxFE: # cycle through each individual in the population for j in range(pop_size): #--- MUTATION (step #3.A) ---------------------+ v_donor, x_t = mutation(population, j, bounds, F) #--- RECOMBINATION (step #3.B) ----------------+ v_trial = crossover(v_donor, x_t, CR) #--- GREEDY SELECTION (step #3.C) -------------+ selection(population, j, benchmark.eval, Individual(v_trial)) #--- SCORE KEEPING --------------------------------+ # gen_avg = sum(gen_scores) / pop_size # current generation avg. fitness # gen_best = min(gen_scores) # fitness of best individual # gen_sol = population.get(gen_scores.index(min(gen_scores))) # solution of best individual gen_best = population.get_best() if benchmark.numFE % 10000 == 0: print('numFE:', benchmark.numFE) # print(' > GENERATION AVERAGE:', gen_avg) # print(' > GENERATION BEST:', gen_best) # print(' > BEST SOLUTION:', gen_sol) print('----------------------------------------------') print(' > GENERATION BEST:', gen_best.score) print('----------------------------------------------') if benchmark.is_solution(gen_best): print('Find a solution! numFE:{0}'.format(benchmark.numFE)) break return population
def crossover(self, dna1: Individual, dna2: Individual) -> Individual: ''' O crossover baseado em ordem utiliza uma máscara de bits gerada aleatoriamente.\n Os genes do dna1 são copiados para o filho referentes às posições onde a máscara de bits é igual a 1. Uma sublista é criada para mapear os genes do dna1 onde a máscara é igual a 0. Essa sublista é ordenada de acordo com a ordem que aparece no dna1.\n Os genes dessa sublista são colocados nos espaços vazios do filho formando um novo cromossomo. ''' if (random() > self.crossover_rate): return dna1 dna1, dna2 = dna1.chromosome, dna2.chromosome mask = random_bits(len(dna1)) child = [ gene if (bit == 1) else None for (gene, bit) in zip(dna1, mask) ] sub_list = [num for num in dna2 if (num not in child)] sub_list.reverse() for i in range(len(child)): if (child[i] == None): child[i] = sub_list.pop() return Individual(child)
def __init__(self, alpha=0.001, gamma=0.993, traj=10, batch=16, env_id='Tennis-ramNoFrameskip-v4', plot_freq=100): self.alpha = alpha self.gamma = gamma self.traj = traj self.batch = batch self.plot_freq = plot_freq self.N_ENVS = 10 self.util = name2class[env_id] self.envs = [gym.make(self.util.name) for _ in range(self.N_ENVS)] self.observations = [None] * self.N_ENVS self.state_shape = (self.util.full_state_dim, ) self.action_dim = self.util.action_space_dim self.player = Individual(self.state_shape, self.action_dim, 3, 0.01, alpha, gamma, 0.0005, 1, traj, batch, 1) self.trajectory = { 'state': np.zeros((batch, traj) + self.state_shape, dtype=np.float32), 'action': np.zeros((batch, traj), dtype=np.int32), 'rew': np.zeros((batch, traj), dtype=np.float32), 'base_rew': np.zeros((batch, traj), dtype=np.float32), } self.last_obs = None self.plot_max = 50000 self.reward = np.full((self.plot_max, 2), np.nan) self.range = np.arange(self.plot_max) self.plot_index = 0
def bnde_run(benchmark, bounds, pop_size, neighborhoodSize, maxFE, d0=1.0E-16): #--- INITIALIZE A POPULATION (step #1) ----------------+ archive = [] # a archive to store all the local best population = init_population(pop_size, bounds) # print(str(population)) best = init_eval(population, benchmark.eval) # print(str(population)) neighbors = MultiPopulationsWithSameSize(pop_size, neighborhoodSize, population) # print(str(neighbors)) mu_pe = 0.5 mu_cr = 0.5 #--- SOLVE --------------------------------------------+ # cycle through each generation (step #2) while benchmark.numFE < maxFE: # best = init_eval(population, benchmark.eval) pe, mu_pe = update_pe(neighbors.size, neighborhoodSize, mu_pe, q=0.1) cr, mu_cr = update_cr_bnde(neighbors.size, neighborhoodSize, mu_cr, q=0.1) diversity_preserving(neighbors, archive, bounds, benchmark.eval, d0=d0) chi = np.exp(-4.0 * (benchmark.numFE / maxFE + 0.4)) # cycle through each individual in the population best_scores = [] for i in range(neighbors.size): sub_pop_i = neighbors.get_subpopulation(i) best_i, worst_i = sub_pop_i.get_best_worst(redo=True) best_scores.append(benchmark.error(best_i)) # print(best_i) for j in range(sub_pop_i.size): x_t = sub_pop_i.get(j).vector #--- MUTATION (step #3.A) ---------------------+ v_donor = gaussian_mutation_lv_bnde(sub_pop_i.best_index, j, sub_pop_i, bounds, pe[i][j], chi) # print("v_donor", v_donor) # if mutation_strategy[j] == 0: # v_donor = gaussian_mutation(best, population.get(j), bounds) # else: # v_donor = de_best_1_mutation(best, population, j, bounds, F) #--- RECOMBINATION (step #3.B) ----------------+ v_trial = crossover(v_donor, x_t, cr[i][j]) # print("v_trial", v_trial) #--- GREEDY SELECTION (step #3.C) -------------+ score_trial, score_target = selection(sub_pop_i, j, benchmark.eval, Individual(v_trial)) # print("numFE:", benchmark.numFE) #--- SCORE KEEPING --------------------------------+ # gen_avg = sum(gen_scores) / pop_size # current generation avg. fitness # gen_best = min(gen_scores) # fitness of best individual # gen_sol = population.get(gen_scores.index(min(gen_scores))) # solution of best individual # print("benchmark.numFE") print(str(benchmark.numFE) + "\r", end="") if benchmark.numFE % 10000 == 0: print('numFE:', benchmark.numFE) # print(' > GENERATION AVERAGE:', gen_avg) # print(' > GENERATION BEST:', gen_best) # print(' > BEST SOLUTION:', gen_sol) print('----------------------------------------------') # print(' > GENERATION AVG BEST:', np.mean(best_scores)) # print(' > GENERATION STD BEST:', np.std(best_scores)) print(' > GENERATION BEST of BEST:', np.min(best_scores)) # print(' > GENERATION WORST of BEST:', np.max(best_scores)) # historical solutions in archive if len(archive) > 0: print(' > HISTORICAL BEST of BEST:', np.min([ind.score for ind in archive])) print('----------------------------------------------') print('----------------------------------------------') for solution in archive: print(' > SOL in archive:', solution) print('----------------------------------------------') print(neighbors) return neighbors