def __init__( self, dataset, metric="nmse", population_size=1000, generations=1000, n_samples=None, tournament_size=3, p_crossover=0.5, p_mutate=0.1, const_range=[-1, 1], const_optimizer="scipy", const_params=None, seed=0, early_stopping=False, threshold=1e-12, verbose=True, protected=True, pareto_front=False, # Constraint hyperparameters constrain_const=True, constrain_trig=True, constrain_inv=True, constrain_min_len=True, constrain_max_len=True, constrain_num_const=True, min_length=4, max_length=30, max_const=3): self.dataset = dataset self.fitted = False assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None." if generations is None: generations = int(n_samples / population_size) # Set hyperparameters self.population_size = population_size self.generations = generations self.tournament_size = tournament_size self.p_mutate = p_mutate self.p_crossover = p_crossover self.seed = seed self.early_stopping = early_stopping self.threshold = threshold self.verbose = verbose self.pareto_front = pareto_front # Fitness function used during training # Includes closure for fitness function metric and training data fitness = partial(self.make_fitness(metric), y=dataset.y_train, var_y=np.var(dataset.y_train)) # Function of y_hat self.fitness = partial(self.compute_fitness, optimize=True, fitness=fitness, X=dataset.X_train.T) # Function of individual # Test NMSE, used as final performance metric # Includes closure for test data nmse_test = partial(self.make_fitness("nmse"), y=dataset.y_test, var_y=np.var(dataset.y_test)) # Function of y_hat self.nmse_test = partial(self.compute_fitness, optimize=False, fitness=nmse_test, X=dataset.X_test.T) # Function of individual # Noiseless test NMSE, only used to determine success for final performance # Includes closure for noiseless test data nmse_test_noiseless = partial( self.make_fitness("nmse"), y=dataset.y_test_noiseless, var_y=np.var(dataset.y_test_noiseless)) # Function of y_hat self.nmse_test_noiseless = partial( self.compute_fitness, optimize=False, fitness=nmse_test_noiseless, X=dataset.X_test.T) # Function of individual self.success = lambda ind: self.nmse_test_noiseless(ind)[ 0] < self.threshold # Function of individual # Create the primitive set pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1]) # Add input variables rename_kwargs = { "ARG{}".format(i): "x{}".format(i + 1) for i in range(dataset.n_input_var) } pset.renameArguments(**rename_kwargs) # Add primitives for op_name in dataset.function_set: if op_name == "const": continue assert op_name in function_map, "Operation {} not recognized.".format( op_name) # Prepend available protected operators with "protected_" if protected and not op_name.startswith("protected_"): protected_op_name = "protected_{}".format(op_name) if protected_op_name in function_map: op_name = protected_op_name op = function_map[op_name] pset.addPrimitive(op.function, op.arity, name=op.name) # # Add constant # if "const" in dataset.function_set: # pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1])) # Add constant const = "const" in dataset.function_set if const: const_params = const_params if const_params is not None else {} self.const_opt = make_const_optimizer(const_optimizer, **const_params) pset.addTerminal(1.0, name="const") # Create custom fitness and individual classes if self.pareto_front: # Fitness it compared lexographically, so second dimension # (complexity) is only used in selection if first dimension (error) # is the same. creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0)) else: creator.create("FitnessMin", base.Fitness, weights=(-1.0, )) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) # Define the evolutionary operators self.toolbox = base.Toolbox() self.toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2) self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr) self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) self.toolbox.register("compile", gp.compile, pset=pset) self.toolbox.register("evaluate", self.fitness) self.toolbox.register("select", tools.selTournament, tournsize=tournament_size) self.toolbox.register("mate", gp.cxOnePoint) self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2) self.toolbox.register('mutate', gp.mutUniform, expr=self.toolbox.expr_mut, pset=pset) # Define constraints, each defined by a func : gp.Individual -> bool. # We decorate mutation/crossover operators with constrain, which # replaces a child with a random parent if func(ind) is True. constrain = partial(gp.staticLimit, max_value=0) # Constraint decorator funcs = [] if constrain_min_len: funcs.append( constraints.make_check_min_len(min_length)) # Minimum length if constrain_max_len: funcs.append( constraints.make_check_max_len(max_length)) # Maximum length if constrain_inv: funcs.append( constraints.check_inv) # Subsequence inverse unary operators if constrain_trig: funcs.append(constraints.check_trig) # Nested trig operators if constrain_const and const: funcs.append(constraints.check_const) # All children are constants if constrain_num_const and const: funcs.append(constraints.make_check_num_const( max_const)) # Number of constants for func in funcs: for variation in ["mate", "mutate"]: self.toolbox.decorate(variation, constrain(func)) # Create the training function self.algorithm = algorithms.eaSimple
def set_const_optimizer(cls, name, **kwargs): """Sets the class' constant optimizer""" const_optimizer = make_const_optimizer(name, **kwargs) Program.const_optimizer = const_optimizer
def __init__(self, dataset, metric="nmse", population_size=1000, generations=1000, n_samples=None, tournament_size=3, p_crossover=0.5, p_mutate=0.1, max_depth=17, max_len=None, max_const=None, const_range=[-1, 1], const_optimizer="scipy", const_params=None, seed=0, early_stopping=False, threshold=1e-12, verbose=True): self.dataset = dataset self.fitted = False assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None." if generations is None: generations = int(n_samples / population_size) # Set hyperparameters self.population_size = population_size self.generations = generations self.tournament_size = tournament_size self.p_mutate = p_mutate self.p_crossover = p_crossover self.max_depth = max_depth self.seed = seed self.early_stopping = early_stopping self.threshold = threshold self.verbose = verbose # Making train/test fitness functions fitness = self.make_fitness(metric) fitness_train = partial(fitness, y=dataset.y_train, var_y=np.var( dataset.y_train)) # Function of y_hat fitness_test = partial(fitness, y=dataset.y_test, var_y=np.var( dataset.y_test)) # Function of y_hat fitness_train_noiseless = partial( fitness, y=dataset.y_train_noiseless, var_y=np.var(dataset.y_train)) # Function of y_hat fitness_test_noiseless = partial( fitness, y=dataset.y_test_noiseless, var_y=np.var(dataset.y_test)) # Function of y_hat self.eval_train = partial( self.evaluate, optimize=True, fitness=fitness_train, X=dataset.X_train.T) # Function of individual self.eval_test = partial(self.evaluate, optimize=False, fitness=fitness_test, X=dataset.X_test.T) # Function of individual self.eval_train_noiseless = partial( self.evaluate, optimize=False, fitness=fitness_train_noiseless, X=dataset.X_train.T) # Function of individual self.eval_test_noiseless = partial( self.evaluate, optimize=False, fitness=fitness_test_noiseless, X=dataset.X_test.T) # Function of individual nmse = partial(self.make_fitness("nmse"), y=dataset.y_test, var_y=np.var(dataset.y_test)) # Function of y_hat self.nmse = partial(self.evaluate, optimize=False, fitness=nmse, X=dataset.X_test.T) # Function of individual # Create the primitive set pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1]) # Add input variables rename_kwargs = { "ARG{}".format(i): "x{}".format(i + 1) for i in range(dataset.n_input_var) } pset.renameArguments(**rename_kwargs) # Add primitives for k, v in _function_map.items(): if k in dataset.function_set: pset.addPrimitive(v.function, v.arity, name=v.name) # # Add constant # if "const" in dataset.function_set: # pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1])) # Add constant const = "const" in dataset.function_set if const: const_params = const_params if const_params is not None else {} self.const_opt = make_const_optimizer(const_optimizer, **const_params) pset.addTerminal(1.0, name="const") # Create custom fitness and individual classes creator.create("FitnessMin", base.Fitness, weights=(-1.0, )) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) # Define the evolutionary operators self.toolbox = base.Toolbox() self.toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2) self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr) self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) self.toolbox.register("compile", gp.compile, pset=pset) self.toolbox.register("evaluate", self.eval_train) self.toolbox.register("select", tools.selTournament, tournsize=tournament_size) self.toolbox.register("mate", gp.cxOnePoint) self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2) self.toolbox.register('mutate', gp.mutUniform, expr=self.toolbox.expr_mut, pset=pset) if max_depth is not None: self.toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=max_depth)) self.toolbox.decorate( "mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=max_depth)) if max_len is not None: self.toolbox.decorate("mate", gp.staticLimit(key=len, max_value=max_len)) self.toolbox.decorate("mutate", gp.staticLimit(key=len, max_value=max_len)) if const and max_const is not None: num_const = lambda ind: len( [node for node in ind if node.name == "const"]) self.toolbox.decorate( "mate", gp.staticLimit(key=num_const, max_value=max_const)) self.toolbox.decorate( "mutate", gp.staticLimit(key=num_const, max_value=max_const)) # Create the training function self.algorithm = algorithms.eaSimple