def main(): # Run 'breeding' pop = toolbox.population(n=300) hof = tools.HallOfFame(1) orig_stdout = sys.stdout f = open('ga_history.txt', 'w') sys.stdout = f print('stats') stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", numpy.mean) stats.register("std", numpy.std) stats.register("min", numpy.min) stats.register("max", numpy.max) algorithms.eaSimple(pop, toolbox, 0.5, 0.2, 40, stats, halloffame=hof) expr = gp.genFull(pset, min_=1, max_=3) tree = gp.PrimitiveTree(expr) print('Tree') print(str(tree)) # print 'Eligible moves: ', self.eligible_moves() #print('Pits: ', self.pits) #print('Board: ', self.board.textify_board()) # print(pop, hof, stats) f.close() sys.stdout = orig_stdout # function = gp.compile(hof, pset) # print(function) print("done") # print(pop, hof, stats) return pop, hof, stats
def main(): random.seed(318) pop = toolbox.population(n=300) hof = tools.HallOfFame(1) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("avg", numpy.mean) mstats.register("std", numpy.std) mstats.register("min", numpy.min) mstats.register("max", numpy.max) pop, logbook = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 50, stats=mstats, halloffame=hof, verbose=True) expr = hof[0] tree = gp.PrimitiveTree(expr) print(tree) draw_logbook(logbook) return pop, logbook, hof
def test_example(self, ex, optimal=False, cats=1): print("\nExample:") print(ex) target_test = regex.compile(self.regex_target).findall(ex['string']) print("Target:") print(target_test) if optimal: expr = self.gen_best_individual(ex) else: expr = self.generate_individual_from_example(ex, self.PATTERN1, collapse=False, cats=cats) if len(expr) > 0: tree = gp.PrimitiveTree(expr) ts = gp.compile(tree, self.pset).s indr = regex.compile(ts) print(indr) test = indr.findall(ex['string']) print(test) print(f'Expression Length: {len(ts)}') else: print("No match found.")
def generate_initial_population_impl(toolbox): toolbox.ind_str_set = set() population = [] retry_count = 0 while len(population) < toolbox.pop_size[0]: ind = gp.PrimitiveTree( gp.genHalfAndHalf(pset=toolbox.pset, min_=2, max_=4)) ind.age = 0 ind.id = toolbox.get_unique_id() pp_str = make_pp_str(ind) if pp_str in toolbox.ind_str_set or len( ind) > toolbox.max_individual_size: if retry_count < toolbox.child_creation_retries: retry_count += 1 continue else: break retry_count = 0 toolbox.ind_str_set.add(pp_str) evaluate_individual(toolbox, ind, pp_str, 0) if ind: toolbox.f.write( f"at gen {toolbox.real_gen}, [{ind.id}] = {get_ind_info(ind)} = init\n" ) toolbox.f.write( f"at gen {toolbox.real_gen}, [{ind.id}] = {str(ind)}\n") population.append(ind) return population
def save_results(self, fname): ''' Save the results of running the DEAP GP to a text file. Params: fname - string of the filename. Returns: N/A ''' if self.rlist: print("Writing results file: {}".format(fname)) with open(fname, "w") as fout: fout.writelines(str(self.log)) best = "\n" + "-" * 80 + "\n" best += "\nBest individual:" seqstr = ", ".join(map(str, self.slist)) best += "\nRequired sequence: {}".format(seqstr) seqstr = ", ".join(map(str, self.rlist)) best += "\nActual sequence: {}".format(seqstr) best += "\n\n" # Display the resultant equation from the best individual tree = gp.PrimitiveTree(self.expr) best += "\nBest algorithm: {}".format(str(tree)) # Report whether the individual is a success or not. if self.slist == self.rlist: best += "\n\nSuccess!" else: best += "\n\nFailed!" fout.writelines(best) else: print("No results available to write to file.")
def create_reduced_dataset(individual, pset, X): exp = gp.PrimitiveTree(individual) string = str(exp) ind = [i for i in range(len(string)) if string.startswith('F', i)] if len(ind) == 0: ind = [0] features = [] hist = [] temp = [] for i in ind: subtree = fitness.get_subtree(i, string) if str(subtree) not in hist: hist.append(str(subtree)) newtree = exp.from_string(subtree, pset) temp.append(str(newtree)) features.append(gp.compile(newtree, pset)) if len(features) == 0: features.append(gp.compile(individual, pset)) X_new = [] i = 0 for x in X: X_new.append([]) for feature, t in zip(features, temp): X_new[i].append(feature(*x)) i += 1 return X_new
def evalInvalid(atlas, toolbox): invalid_ind = [ind for ind in atlas if ind.charac is None] #Reduce individuals to primitiveTrees to reduce parallelisation overhead invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind] calculations = list(toolbox.multiMap(toolbox.evalMapping, invalid_primTree)) return assignValues(invalid_ind, calculations)
def pruebaIndividuo(): pset = configuraIndividuo() # Realiza una incialización completa con profundida entre 1 y 3 expr = gp.genFull(pset, min_=1, max_=3) # Obtiene el arbol correspondiente tree = gp.PrimitiveTree(expr) # Se visualiza como una lista de operaciones print(tree)
def evaluate_final_pop(arg): expr = arg #expr = ''.join(str(i) for i in l) tree = gp.PrimitiveTree(expr) tree = str(tree) #print("t: ",tree) song = gp.compile(tree, pset) #print(function) return song
def evalSymbReg(individual): # Transform the tree expression in a callable function func = toolbox.compile(expr=individual) markup = func(x='text') code = str(gp.PrimitiveTree(individual)) # result_code = 'body(concat(header(concat(btn(x), btn(x))), concat(btn(x), btn(x))))' # result_code = 'body(header(concat(btn(x), btn(x))))' # code = str(gp.PrimitiveTree(individual)) # return -len(set(result_code) & set(code)) - (10 if result_code == code else 0), return -calc_similarity(RESULT_HTML, markup),
def subtreeGenerator(subtreeSlices, treeBestInd): listOfPrimSubTF = [] for j in range(len(subtreeSlices)): primSubTreeL = [] for i in range(subtreeSlices[j].start, subtreeSlices[j].stop): primSubTreeL.append(treeBestInd[i]) primSubTree = gp.PrimitiveTree(primSubTreeL) toolbox = base.Toolbox() toolbox.register("compile", gp.compile, pset=pset) listOfPrimSubTF.append(toolbox.compile(expr=primSubTree)) return listOfPrimSubTF
def txt_to_individual(file_path, pset): file = open(file_path, 'r') string = file.read() file.close() string.replace('div', 'pdiv') string.replace('sqrt', 'psqrt') string.replace('log', 'plog') expr = gp.genFull(pset, min_=1, max_=3) tree = gp.PrimitiveTree(expr) individual = tree.from_string(string, pset) return individual
def pruebaIndividuo(): pset = configuraIndividuo() # Realiza una incialización completa con profundida entre 1 y 3 # Performs a full initialisation with depth between 1 and 3 expr = gp.genFull(pset, min_=1, max_=3) # Obtiene el arbol correspondiente # Extracts the corresponding tree tree = gp.PrimitiveTree(expr) # Se visualiza como una lista de operaciones # Displayed as a list of operations print(tree)
def pruebaPoblacion(): toolbox = base.Toolbox() configuraPoblacion(toolbox) # Se inicializa la poblacion. Tendrá un total de 10 individuos. pop = toolbox.population(n=10) # Se imprime la población: 10 individuos como arboles de expresiones for ind in pop: print(gp.PrimitiveTree(ind))
def get_optimal_drops(self, example, pattern, debug=False): non_opt_expr = self.generate_individual_from_example(example, pattern, collapse=False) tree_non_opt = gp.PrimitiveTree(non_opt_expr) ts = gp.compile(tree_non_opt, self.pset).s best_len, best_expr = len(ts), non_opt_expr target_test = regex.compile(self.regex_target).findall( example['string']) upper = 15 while upper > 2: skip = 2 while skip < 12: temp_expr = self.generate_individual_from_example( example, pattern, collapse=True, drop_limit=upper, skip_count=skip) tree_test = gp.PrimitiveTree(temp_expr) ts = gp.compile(tree_test, self.pset).s drop_len = len(ts) indr = regex.compile(ts) test = indr.findall(example['string']) if test == target_test: skip += 2 upper -= 1 if drop_len < best_len: if debug: print( f'Regex: {indr}, length: {drop_len}, test: {test}' ) best_expr = temp_expr best_len = drop_len else: skip = 12 # no use trying to skip more is skipping this many doesn't work upper = 2 # no use trying a smaller limit if a larger doesn't work return best_expr
def show_results(self, gtitle): ''' Display the results of executing the DEAP GP object; best individual is displayed. Params: gtitle - string of the graph title. Returns: N/A ''' if self.rlist: # Show the squence required result = ", ".join(map(str, self.slist)) print("\nRequired sequence: {}".format(result)) # Show the resultant integer sequence result = ", ".join(map(str, self.rlist)) print("\nCalculated result: {}".format(result)) # Let the user know how it went. print("-" * 80) if self.rlist == self.slist: print("\nSuccessfully calculated the Integer Sequence.") else: print("\nUnsuccessfull in calculating the Integer Sequence.") # Display the individual print('\nBest individual : ', self.expr) # Display the resultant equation from the best individual tree = gp.PrimitiveTree(self.expr) str(tree) # Display the best individual => graph and equation. # Only works reliably on Linux Ubuntu. if sys.platform == 'linux' or sys.platform == 'linux2': print("Running on Linux OS.") nodes, edges, labels = gp.graph(self.expr) # Create tree diagram g = nx.Graph() g.add_nodes_from(nodes) g.add_edges_from(edges) pos = graphviz_layout(g, prog="dot") nx.draw_networkx_nodes(g, pos) nx.draw_networkx_edges(g, pos) nx.draw_networkx_labels(g, pos, labels) # Remove the filename extension gtitle = os.path.splitext(gtitle)[0] plt.title(gtitle, None, 'center', None) plt.savefig(gtitle + '.png') # plt.show() else: print("Graphical output only available on Linux.") else: print("\nError: hof variable is emtpy.")
def view_results(self, results): lab_dict = { 'cat_prim': '+', 'q_constr': '?', 'plus_constr': '+', 'or_prim': '|', 'star_constr': 'x', 'looka_constr': '(?=)', 'lookb_constr': '(?<=)', 'nch_constr': '[^ ]', 'ch_constr': '[ ]', 'term_constr': 't()' } hof, log = results avgs = [l['avg'] for l in log] plt.plot(avgs) plt.xlabel('Generations') plt.ylabel('Fitness Averages') plt.title('Capture Fitness Avgs over Generations (0 is perfect)') plt.show() expr = hof.__dict__['items'][0] tree = gp.PrimitiveTree(expr) str(tree) + ' = ' + gp.compile(tree, pset=self.pset).s # for i in range(3): nodes, edges, labels = gp.graph(expr) print(labels.values()) new_labs = {} for i, lab in enumerate(labels.values()): if self.has_digit(lab): new_labs[i] = self.pset.context[lab].s if new_labs[i] == ' ': new_labs[i] = '\' \'' else: new_labs[i] = lab_dict[lab] print(labels) print(new_labs) graph = networkx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) pos = graphviz_layout(graph, prog="dot") plt.figure(figsize=(10, 10)) networkx.draw_networkx_nodes(graph, pos, node_size=400, node_color='w') networkx.draw_networkx_edges(graph, pos, edge_color="blue") networkx.draw_networkx_labels(graph, pos, new_labs) plt.axis("off") plt.show()
def visit_FuncDef(self, node): name = node.decl.name params = list([(decl.name, decl.type.type.names[0]) for decl in node.decl.type.args.params]) for param_name, type in params: if type in type_map: real_type, _ = type_map[type] else: real_type = Val self.vars[param_name] = real_type self.tree = gp.PrimitiveTree([]) self.visit(node.body) self.results.append((name, params, self.tree)) self.tree = None
def main(): random.seed(318) pop = toolbox.population(n=300) hof = tools.HallOfFame(1) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("min", np.min) mstats.register("avg", np.mean) pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.2, 20, stats=mstats, halloffame=hof, verbose=True) expr = hof[0] tree = gp.PrimitiveTree(expr) print('LOSS: {}'.format(evalByColorProportion(expr))) print() print('CODE: {}'.format(tree)) print() func = toolbox.compile(expr=expr) markup = func(x='text') print('HTML: \n') print(markup) print() draw_graph(expr) draw_logbook(log) func = toolbox.compile(expr=expr) markup = body(func(x='text')) result_img = renderer.render_html(markup) result_img.save('output/result_html.png') return pop, log, hof
def gen_best_individual(self, ex, w=True, collapse=False, cats=1, drop_limit=10, skip_counts=2): # try first match pattern which produces shorter results (combines punctuation) expr = self.generate_individual_from_example(ex, self.PATTERN1) target = regex.compile(self.regex_target).findall(ex['string']) expr_regex = regex.compile( gp.compile(gp.PrimitiveTree(expr), self.pset).s) if expr_regex.findall(ex['string']) == target: best_pattern = self.PATTERN1 else: best_pattern = self.PATTERN2 return self.get_optimal_drops(ex, best_pattern)
def from_string2(cls, string, pset): tokens = re.split("[ \t\n\r\f\v(),]", string) expr = [] for token in tokens: if token == '': continue if token in pset.mapping: primitive = pset.mapping[token] expr.append(primitive) else: try: token = eval(token) except NameError: raise TypeError( "Unable to evaluate terminal: {}.".format(token)) type_ = type(token) expr.append(Terminal(token, False, type_)) return gp.PrimitiveTree(expr)
def main(): random.seed(318) pop = toolbox.population(n=300) hof = tools.HallOfFame(1) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("max", numpy.mean) pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 20, stats=mstats, halloffame=hof, verbose=True) expr = hof[0] tree = gp.PrimitiveTree(expr) print('SCORE: \n') print(evalSymbReg(expr)) print() print('CODE: \n') print(tree) print() func = toolbox.compile(expr=expr) markup = func(x='text') # print('HTML: \n') # print(markup) # print() # print('RESULT HTML: \n') # print(RESULT_HTML) return pop, log, hof
def __init__(self, model, dataset_meta_features_df, primitive_to_hash_dic, le, knowledge_base_path, json_path, longest_pipeline_size, rank_size=100, use_meta_model_flag=True, boosting=False, boosting_percent=0.1, meta_selection_window_size=10, meta_selection=False, meta_selection_type='offspring', meta_fitness_flag=False, **kwargs): self.meta_fitness_flag = meta_fitness_flag self.use_boosting_flag = boosting super().__init__(**kwargs) self.knowledge_base_path = knowledge_base_path self.json_path = json_path if self.use_boosting_flag and (self.knowledge_base_path is None or self.json_path is None): raise ValueError( "Cannot use boosting without knowledge-base or Json path") self.meta_features_df = dataset_meta_features_df self.longest_pipeline_size = longest_pipeline_size self.use_metamodel_flag = use_meta_model_flag self.meta_selection_type = meta_selection_type self.boosting_size = int(self.population_size * boosting_percent) self.meta_selection_size = meta_selection_window_size self.meta_selection_flag = meta_selection self.rank_size = rank_size self.meta_model = model self.primitives_to_hash_dic = primitive_to_hash_dic self.gptree = gp.PrimitiveTree(list()) self.le = le self.gen_results_dic = {}
def prueba(): '''Herramienta para guardar la configuracion de la poblacion''' toolbox = base.Toolbox() ConfiguracionProblema.configuraPoblacion(toolbox) # Se instancia un individuo (aleatorio) ind = toolbox.individual() ''' Se aconseja al alumno probar con varios individuos en diferentes condiciones de optimalidad para comprobar si la función está bien definida en todo el espacio de búsqueda. ''' # Se imprime el individuo ANTES de evaluar tree = gp.PrimitiveTree(ind) print(tree) print(ind.fitness.valid) # False ind.fitness.values = evalEcuacion(toolbox, ind) # Se imprime el individuo DESPUES de evaluar print(ind.fitness.valid) # True print(ind.fitness)
def evaluate_fitness(arg): #print(arg) fitness = 0 tree = gp.PrimitiveTree(arg) tree = str(tree) #print("t: ",tree) song = list(gp.compile(tree, pset)) ########################################### str_song = "".join(song) # convert sequence into latent embeddings form X_new_counts = count_vect.transform([str_song]) X_new_tfidf = tfidf_transformer.transform(X_new_counts) # discriminator predict probability of given sample being generated by DNN or GP dnn_prob, gp_prob = discriminator.predict_proba(X_new_tfidf)[0] # higher penalty if the sample is considered more like GP outpout fitness -= 200 * gp_prob ########################################### if (len(song) <= 3): fitness = fitness - 10 elif (len(song) > 3 and len(song) <= 100): fitness += 9 else: fitness -= 70 c = Counter(song) note_count = c.values() notes = c.keys() for value in note_count: if value <= len(song) / 3: fitness += 10 if value > len(song) / 2: fitness = fitness - 20 return fitness,
def mutUniform(toolbox, parent, expr, pset): child = copy_individual(toolbox, parent) index = random.randrange(0, len(child)) slice_ = child.searchSubtree(index) type_ = child[index].ret mutation = expr(pset=pset, type_=type_) child[slice_] = mutation pp_str = make_pp_str(child) if pp_str in toolbox.ind_str_set or len( child) > toolbox.max_individual_size: return None, None evaluate_individual(toolbox, child, pp_str, 0) if child: mutation = gp.PrimitiveTree(mutation) expr_str = str(mutation) f1 = parent.fam.family_index toolbox.f.write( f"at gen {toolbox.real_gen}, [{child.id}] = {get_ind_info(child)} = mut [{parent.id}]<{f1}>\n" ) toolbox.f.write( f"at gen {toolbox.real_gen}, [{child.id}] = mut expr {expr_str}\n") toolbox.f.write( f"at gen {toolbox.real_gen}, [{child.id}] = {str(child)}\n") return child, pp_str
def create_attribute(pset): tree = gp.genGrow(pset=pset, min_=settings.MINIMAL_TREE_DEPTH, max_=settings.MAXIMUM_TREE_DEPTH) return gp.PrimitiveTree(tree)
def eaParetosSVD(atlases, toolbox, cxpb, mutpb, ngen, minDiv, minSize=20, stats=None, halloffame=None, checkpoint=None, freq=50, verbose=__debug__): """ Structure of this algorithm: Calculates fitness of mappings in atlas """ invalid_ind = [ind for ind in sum(atlases, []) if ind.charac is None] invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind] calculations = toolbox.multiMap(toolbox.evalMapping, invalid_primTree) widgets = [ 'Evolving: ', Percentage(), ' ', Bar(marker='#', left='[', right=']'), ' ', ETA() ] nevals = 0 paretoN = 0 adaptiveMinDiv = False lastcheckpoint = None logbook = tools.Logbook() logbook.header = (stats.fields if stats else []) + ['gen', 'nevals', 'natlas'] #Generate population of migrants to allow transfer between atlases migrants = [] pbar = ProgressBar(widgets=widgets, maxval=ngen + 1) pbar.start() # Begin the generational process for gen in range(1, ngen + 1): #Evaluate fitness and pareto fronts for the generation #nevals += sum(toolbox.map(toolbox.evalCRS, atlases)) print 'calc' nevals += assignValues(invalid_ind, list(calculations)) print 'calc end' #filter invalid mappings out so that SVD can be done: def filterInd(individual): validCharac = np.isfinite( individual.charac['characteristic'].sum()) validNorm = not np.isclose( individual.domainSD + np.abs(individual.domainAv), 0) validRobust = individual.robustness < 1e10 return validCharac and validNorm and validRobust atlases = toolbox.map(lambda x: filter(filterInd, x), atlases) if type(minDiv) is tuple: a, b = minDiv adaptiveMinDiv = True else: minDivList = [minDiv for atlas in atlases] if adaptiveMinDiv: minDiv = 1 - 10**(-paretoN**a / b) minDivList = [1 - 10**(-paretoN**a / b) for atlas in atlases] print 'filter' selectedAtlases = list( toolbox.map(lambda atlas, minD: selectSVD(atlas, minD), atlases, minDivList)) print 'selected' # Match size of migrants to pareto fronts paretoN = sum(map(len, selectedAtlases)) while paretoN > len(migrants) or len(atlases) * minSize > len( migrants): ind = toolbox.individual() ind.charac = None ind.robustness = None ind.shift = None migrants.append(ind) #Place selected individuals in migrant population selected = sum(selectedAtlases, []) swap(selected, migrants) # Vary the pool of individuals migrants = varAnd(migrants, toolbox, cxpb, mutpb) print 'migrants' #Invalidate mutated individuals def newInd(ind): ind.charac = None ind.robustness = None ind.shift = None del ind.fitness.values return ind [newInd(ind) for ind in migrants if not ind.fitness.valid] #create new atlases and mutate migrants: def nextGen(selected): if len(selected) > minSize: offspring = random.sample(migrants, len(selected)) else: offspring = random.sample(migrants, 2 * minSize) # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(selected) return removeDuplicates(selected + offspring) atlases = toolbox.map(nextGen, selectedAtlases) invalid_ind = [ind for ind in sum(atlases, []) if ind.charac is None] invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind] calculations = toolbox.multiMap(toolbox.evalMapping, invalid_primTree) record = stats.compile(sum(selectedAtlases, [])) if stats else {} logbook.record(gen=gen, nevals=nevals, natlas=map(len, selectedAtlases), **record) if verbose: print logbook.stream print 'new Atlases', map(len, atlases) if checkpoint is not None and gen % freq == 0: os.chdir(checkpoint) filename = str(gen) + '-' + datetime.now().time().replace( microsecond=0).isoformat() + '.pkl' cp = dict( atlases=selectedAtlases, generation=gen, halloffame=halloffame, logbook=logbook, migrants=migrants, rndstate=random.getstate(), ) with open(filename, 'wb') as f: pickle.dump(cp, f, -1) print "Checkpointed at generation %.i" % gen + ' in ' + checkpoint + '/' + filename lastcheckpoint = gen elif lastcheckpoint is not None: print "Last checkpointed at generation %.i" % lastcheckpoint + ' in ' + checkpoint + '/' + filename print '' pbar.update(gen) print '' print '' pbar.finish() return paretos, logbook
def run(self, verbose=False): creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0, -1.0)) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMulti) start = time.time() self.population = self.init_pop() elapsed = time.time() - start #print(f'elapsed seconds for population initialization: {elapsed}') self.population = [ creator.Individual(indv) for indv in self.population ] toolbox = base.Toolbox() toolbox.register("expr", self.genRampedRegexTree, min_=self.min_ht, max_=self.max_ht, ratio=self.term_ratio, classes=self.classes, pset=self.pset) toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=self.pset) hof = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) logbook = tools.Logbook() logbook.header = ['gen'] + stats.fields for g in range(self.ngen): start_time = time.time() # get the fitnesses for every individual in the population fitnesses = futures.map(self.evaluate_regex, self.population) for indv, fits in zip(self.population, fitnesses): indv.fitness.values = fits # log and record progress record = stats.compile(self.population) logbook.record(gen=g, **record) if verbose: print(logbook.stream) hof.update(self.population) # sort by Pareto-fronts (NSGA-II, Deb, et) self.population = [ indv for front in tools.sortNondominated( self.population, self.pop_size) for indv in front ] keep_num = int(self.pop_size * 0.9) # keep 90% of old gen new_pop = [] while len(new_pop) < keep_num: rnum = random.random() if rnum < self.CXPB: cx_indv1 = tools.selTournament(self.population, k=1, tournsize=7)[0] cx_indv2 = tools.selTournament(self.population, k=1, tournsize=7)[0] # cx_indv1, cx_indv2 = gp.cxOnePointLeafBiased(cx_indv1, cx_indv2, self.term_ratio) cx_indv1, cx_indv2 = self.cxLeafOrSubTree( cx_indv1, cx_indv2, self.term_ratio) new_pop.append(cx_indv1) new_pop.append(cx_indv2) elif rnum < self.CXPB + self.MUTPB: mutant = toolbox.mutate( tools.selTournament(self.population, k=1, tournsize=7)[0])[0] new_pop.append(mutant) else: new_pop.append( tools.selTournament(self.population, k=1, tournsize=7)[0]) self.population = new_pop + toolbox.population(n=self.pop_size - keep_num) best = tools.selBest(self.population, k=1)[0] tree = gp.PrimitiveTree(best) print('Best of that gen:') print( gp.compile(tree, pset=self.pset).s + '\nFitness: ' + str({best.fitness.values})) elapsed_time = time.time() - start_time remaining_min = (elapsed_time * (self.ngen - g)) / 60 remaining_hours = remaining_min / 60 print( f"Time for last gen: {elapsed_time} secs, Remaining: {remaining_min} minutes, {remaining_hours} hours." ) print('[' + ('*' * (g // self.ngen)) + ((100 - (g // self.ngen)) * ' ') + ']') return hof, logbook
def demo_test(self): # TESTING ... # Generate 10 random trees for i in range(10): pset = self.pset terminals = self.terminals expr = self.genRampedRegexTree(min_=7, max_=20, ratio=0.7, pset=self.pset, classes=self.classes) tree = gp.PrimitiveTree(expr) indr = gp.compile(tree, self.pset).s print(f"Tree: {tree}") print(f"Random Regex: {repr(indr)}") indr = regex.compile(indr) # an example of building a tree manually for a regular expression representation as a tree, # where the nodes alternate from bottom up- right branch, then left closer to primitive expr = [ self.get_primitive(class_precedence(LookAheadClass, LookBehindClass), [LookAheadClass, LookBehindClass], key='cat'), self.get_primitive(LookBehindClass, [TermClass]), self.get_primitive(TermClass, [TermClass, TermClass], key='cat'), self.get_term(TermClass, "\w")[0], self.get_term(TermClass, "title")[0], self.get_primitive(class_precedence(LookAheadClass, TermClass), [LookAheadClass, TermClass], key='cat'), self.get_term(TermClass, "target ={stuff}")[0], self.get_primitive(class_precedence(LookAheadClass, TermClass), [LookAheadClass, TermClass], key='cat'), self.get_term(TermClass, "\w")[0], self.get_primitive(LookAheadClass, [TermClass]), self.get_primitive(TermClass, [TermClass, TermClass], key='cat'), self.get_term(TermClass, "\s")[0], self.get_term(TermClass, "year")[0] ] tree = gp.PrimitiveTree(expr) indr = regex.compile(gp.compile(tree, pset).s) print("\nManually Constructed INDR:") print(indr) eval_test = self.evaluate_regex(tree, fit_cache=None) ex_mod = { "string": "\ntitle a={La puerta}, 1 2 a b c\n", "match": [{ "start": 10, "end": 19 }], "unmatch": [{ "start": 0, "end": 10 }, { "start": 19, "end": 32 }] } rexp = "(?<=\ntitle \w+={)\w+ \w+(?=}, \d+ \d+(?:\s)|(?:\w)\n)" #rexp = "(?<=\ntitle \w={)\w+ \w+(?=}, \d+ \d+( \w+)+\n)" rexp2 = "(?:(?<=\ntitle \w={))\w+ \w+(?=}, \d+ \d+( \w+)+\n)" indr = regex.compile(rexp) print(indr) test = indr.findall(ex_mod['string']) print(test) for e in [ 8, 35 ]: # examples with punctuation at end of target requiring different match pattern self.test_example(self.data['examples'][e], optimal=False) self.test_example(self.data['examples'][e], optimal=True) drop_misses, non_drop_misses, length_diffs = 0, 0, 0 for i, ex in enumerate(self.data['examples']): print(f"EXAMPLE {i}:") #print(ex['string'][ex['match'][0]['start']:ex['match'][0]['end']]) expr1 = self.generate_individual_from_example(ex, self.PATTERN1) expr2 = self.gen_best_individual(ex) if i == random.choice(self.data['examples']): print(f"Random Eval, Ex {i}:") self.evaluate_regex(expr1) self.evaluate_regex(expr2) target = regex.compile(self.regex_target).findall(ex['string']) ex1_rstring = gp.compile(gp.PrimitiveTree(expr1), self.pset).s ex2_rstring = gp.compile(gp.PrimitiveTree(expr2), self.pset).s expr1_regex = regex.compile(ex1_rstring) expr2_regex = regex.compile(ex2_rstring) test1 = expr1_regex.findall(ex['string']) test2 = expr2_regex.findall(ex['string']) print(test1) print(test2) len1 = len(ex1_rstring) len2 = len(ex2_rstring) if test1 != target: non_drop_misses += 1 if test2 != target: drop_misses += 1 if (test1 == target) and (test2 == target): print(len1, len2) length_diffs += len1 - len2 print( f'Non-Drop Misses: {non_drop_misses}, Drop Misses: {drop_misses}') print(f'Length Differences: {length_diffs}')