def add_seeded_individuals(toolbox: base.Toolbox, options: dict, ccl_objects: dict, primitive_set: gp.PrimitiveSetTyped) -> List[gp.PrimitiveTree]: """Add individuals specified by user and their mutations""" pop = [] codes = set() raw_codes = [] with open(options['seeded_individuals']) as f: for line in f: raw_codes.append(line.strip()) for no, ind in enumerate(raw_codes): try: x = creator.Individual(gp.PrimitiveTree.from_string(ind, primitive_set)) except TypeError: raise RuntimeError(f'Incorrect seeded individual (probably incorrect symbol): {ind}') try: sympy_code = str(generate_sympy_expr(x, ccl_objects)) except RuntimeError: raise RuntimeError(f'Initial individual causes problem: {ind}') print(f'[Seed {no:2d} No mutation]: {sympy_code}') x.sympy_code = sympy_code pop.append(x) i = 0 codes.add(sympy_code) while i < options['initial_seed_mutations']: y = toolbox.clone(x) try: y, = toolbox.mutate(y) except IndexError: raise RuntimeError(f'Incorrect seeded individual (probably wrong arity): {ind}') if not check_symbol_counts(y, options): continue try: mut_sympy_expr = generate_sympy_expr(y, ccl_objects) mut_sympy_code = str(mut_sympy_expr) except RuntimeError: continue if mut_sympy_code in codes: continue if mut_sympy_expr.has(sympy.zoo, sympy.oo, sympy.nan, sympy.I): continue if options['max_constant_allowed'] is not None and not check_max_constant(mut_sympy_expr, options): continue codes.add(mut_sympy_code) i += 1 print(f'[Seed {no:2d} Mutation {i:2d}]: {mut_sympy_code}') y.sympy_code = mut_sympy_code pop.append(y) return pop
def run_opd_ga(toolbox: base.Toolbox, popsize: int, gens: int, cxpb: float, mutpb: float, elitism_k: int, new_inds_per_gen: int, target_lambda: int, verbose: bool = False): if verbose: print('Starting GA...') fitness_history = [] pop = toolbox.generate_population(n=popsize) fitnesses = toolbox.parallel_map(toolbox.evaluate, pop) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit if verbose: print('Initial evaluation done.') fits = [ind.fitness.values[0] for ind in pop] fitness_history += [fits.copy()] g = 0 min_fits, max_fits, avg_fits, var_fits = [], [], [], [] while min(fits) > target_lambda and g < gens: g = g + 1 elite = tools.selBest(pop, elitism_k) offspring = toolbox.select( pop, len(pop) - elitism_k - new_inds_per_gen) + elite offspring = list( map(toolbox.clone, offspring)) + toolbox.generate_population(n=new_inds_per_gen) for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < cxpb: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < mutpb: toolbox.mutate(mutant) del mutant.fitness.values invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = toolbox.parallel_map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit pop[:] = offspring fits = [ind.fitness.values[0] for ind in pop] fitness_history += [fits.copy()] if verbose == 2: min_fits += [min(fits)] max_fits += [max(fits)] avg_fits += [statistics.mean(fits)] var_fits += [statistics.variance(fits)] if verbose and (g % (gens // 10) == 0 or g == 1): print(f'Generation {g}') if verbose == 2: print(' Min %s' % min_fits[-1]) print(' Max %s' % max_fits[-1]) print(' Avg %s' % avg_fits[-1]) print(' Var %s' % var_fits[-1]) timeout = min(fits) == target_lambda return {'fitness_history': fitness_history, 'timeout': timeout}
def execute(toolbox: base.Toolbox, cases: int = 100) -> List[str]: population = toolbox.population(n=POPULATION_SIZE) hall_of_fame = tools.ParetoFront() stats = tools.Statistics(lambda i: i.fitness.values) stats.register("avg", numpy.mean, axis=0) stats.register("std", numpy.std, axis=0) stats.register("min", numpy.min, axis=0) stats.register("max", numpy.max, axis=0) logbook = tools.Logbook() logbook.header = "gen", "evals", "std", "min", "avg", "max", "best" # Evaluate every individuals for individual in population: individual.fitness.values = toolbox.evaluate(individual) hall_of_fame.update(population) record = stats.compile(population) logbook.record(gen=0, evals=len(population), **record) print(logbook.stream) generated_cases = list last_fitness = float('inf') current_fitness = None generation_count = 1 while generation_count <= MAX_GENERATIONS and ( last_fitness != current_fitness or current_fitness == float('inf')): last_fitness = current_fitness # Select the next generation individuals offspring = toolbox.select(population, floor(POPULATION_SIZE * 0.9)) # Clone the selected individuals offspring = list(toolbox.map(toolbox.clone, offspring)) # Add new individuals from the population offspring += toolbox.population(n=POPULATION_SIZE - len(offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if not random() < MATE_RATIO: continue toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if not random() < MUTATION_RATIO: continue toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ individual for individual in offspring if not individual.fitness.valid ] for individual in offspring: individual.fitness.values = toolbox.evaluate(individual) generated_cases = tools.selBest(population, k=cases) current_fitness = sum( toolbox.map(op.itemgetter(0), toolbox.map(toolbox.evaluate, generated_cases))) best = choice(generated_cases) word = "".join(best) # Select the next generation population population = toolbox.select(population + offspring, POPULATION_SIZE) record = stats.compile(population) logbook.record(gen=generation_count, evals=len(invalid_ind), best=word, **record) print(logbook.stream) generation_count += 1 return [''.join(case) for case in generated_cases]
def learn_causal_structure( toolbox: base.Toolbox, pop_size: int = 10, crossover_pr: float = 1, mutation_pr: float = 0.2, num_elites: int = 1, max_gens: int = 50, ): """ Perform the structur learning task using a genetic algorithm :param toolbox: registry of tools provided by DEAP :param pop_size: the number of individuals per generation :param crossover_pr: the crossover rate for every (monogamous) couple :param mutation_pr: the mutation rate for every individual :param num_elites: :param max_gens: the maximum number of generations :return: """ # initialize a collection of instrumentation utilities to facilitate later analysis instrumentation = initialize_instrumentation() # ====== 0️⃣ initialize population ====== population = toolbox.population(n=pop_size) # ====== 1️⃣ Evaluate the entire population ====== n_evals = evaluate_population(population, toolbox) # Log initial stats for later analysis log_generation_stats(0, population, n_evals, **instrumentation) # ====== 2️⃣ the loop is the only termination criterion ====== for gen in range(max_gens): elites = get_fittest_individuals(population, num_elites) # ====== 3️⃣ Parent selection ====== # Select the next generation individuals offspring = toolbox.select(population, len(population)) # Clone the selected individuals offspring = list(map(toolbox.clone, offspring)) # ====== 4️⃣ Apply crossover and mutation on the offspring ====== for child1, child2 in zip(offspring[::2], offspring[1::2]): # crossover probability applies to every couple if random.random() < crossover_pr: toolbox.mate(child1, child2) child1.fitness = np.nan child2.fitness = np.nan # mutation probability applies to every individual for mutant in offspring: if random.random() < mutation_pr: toolbox.mutate(mutant) mutant.fitness = np.nan # ====== 5️⃣ Evaluate the individuals with an invalid fitness ====== n_evals = evaluate_population(offspring, toolbox) # Log intermediary stats for later analysis log_generation_stats(gen + 1, population, n_evals, **instrumentation) # ====== 6️⃣ Replacement ====== # The population is entirely replaced by the offspring, except for the top elites fittest_offsprings = get_fittest_individuals(offspring, pop_size - num_elites) population[:] = elites + fittest_offsprings # ====== 7️⃣ Return final population ====== return population, instrumentation