Exemple #1
0
    def __init__(
            self,
            dataset,
            metric="nmse",
            population_size=1000,
            generations=1000,
            n_samples=None,
            tournament_size=3,
            p_crossover=0.5,
            p_mutate=0.1,
            const_range=[-1, 1],
            const_optimizer="scipy",
            const_params=None,
            seed=0,
            early_stopping=False,
            threshold=1e-12,
            verbose=True,
            protected=True,
            pareto_front=False,
            # Constraint hyperparameters
            constrain_const=True,
            constrain_trig=True,
            constrain_inv=True,
            constrain_min_len=True,
            constrain_max_len=True,
            constrain_num_const=True,
            min_length=4,
            max_length=30,
            max_const=3):

        self.dataset = dataset
        self.fitted = False

        assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None."
        if generations is None:
            generations = int(n_samples / population_size)

        # Set hyperparameters
        self.population_size = population_size
        self.generations = generations
        self.tournament_size = tournament_size
        self.p_mutate = p_mutate
        self.p_crossover = p_crossover
        self.seed = seed
        self.early_stopping = early_stopping
        self.threshold = threshold
        self.verbose = verbose
        self.pareto_front = pareto_front

        # Fitness function used during training
        # Includes closure for fitness function metric and training data
        fitness = partial(self.make_fitness(metric),
                          y=dataset.y_train,
                          var_y=np.var(dataset.y_train))  # Function of y_hat
        self.fitness = partial(self.compute_fitness,
                               optimize=True,
                               fitness=fitness,
                               X=dataset.X_train.T)  # Function of individual

        # Test NMSE, used as final performance metric
        # Includes closure for test data
        nmse_test = partial(self.make_fitness("nmse"),
                            y=dataset.y_test,
                            var_y=np.var(dataset.y_test))  # Function of y_hat
        self.nmse_test = partial(self.compute_fitness,
                                 optimize=False,
                                 fitness=nmse_test,
                                 X=dataset.X_test.T)  # Function of individual

        # Noiseless test NMSE, only used to determine success for final performance
        # Includes closure for noiseless test data
        nmse_test_noiseless = partial(
            self.make_fitness("nmse"),
            y=dataset.y_test_noiseless,
            var_y=np.var(dataset.y_test_noiseless))  # Function of y_hat
        self.nmse_test_noiseless = partial(
            self.compute_fitness,
            optimize=False,
            fitness=nmse_test_noiseless,
            X=dataset.X_test.T)  # Function of individual
        self.success = lambda ind: self.nmse_test_noiseless(ind)[
            0] < self.threshold  # Function of individual

        # Create the primitive set
        pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1])

        # Add input variables
        rename_kwargs = {
            "ARG{}".format(i): "x{}".format(i + 1)
            for i in range(dataset.n_input_var)
        }
        pset.renameArguments(**rename_kwargs)

        # Add primitives
        for op_name in dataset.function_set:
            if op_name == "const":
                continue
            assert op_name in function_map, "Operation {} not recognized.".format(
                op_name)

            # Prepend available protected operators with "protected_"
            if protected and not op_name.startswith("protected_"):
                protected_op_name = "protected_{}".format(op_name)
                if protected_op_name in function_map:
                    op_name = protected_op_name

            op = function_map[op_name]
            pset.addPrimitive(op.function, op.arity, name=op.name)

        # # Add constant
        # if "const" in dataset.function_set:
        #     pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1]))

        # Add constant
        const = "const" in dataset.function_set
        if const:
            const_params = const_params if const_params is not None else {}
            self.const_opt = make_const_optimizer(const_optimizer,
                                                  **const_params)
            pset.addTerminal(1.0, name="const")

        # Create custom fitness and individual classes
        if self.pareto_front:
            # Fitness it compared lexographically, so second dimension
            # (complexity) is only used in selection if first dimension (error)
            # is the same.
            creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
        else:
            creator.create("FitnessMin", base.Fitness, weights=(-1.0, ))
        creator.create("Individual",
                       gp.PrimitiveTree,
                       fitness=creator.FitnessMin)

        # Define the evolutionary operators
        self.toolbox = base.Toolbox()
        self.toolbox.register("expr",
                              gp.genHalfAndHalf,
                              pset=pset,
                              min_=1,
                              max_=2)
        self.toolbox.register("individual", tools.initIterate,
                              creator.Individual, self.toolbox.expr)
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)
        self.toolbox.register("compile", gp.compile, pset=pset)
        self.toolbox.register("evaluate", self.fitness)
        self.toolbox.register("select",
                              tools.selTournament,
                              tournsize=tournament_size)
        self.toolbox.register("mate", gp.cxOnePoint)
        self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
        self.toolbox.register('mutate',
                              gp.mutUniform,
                              expr=self.toolbox.expr_mut,
                              pset=pset)

        # Define constraints, each defined by a func : gp.Individual -> bool.
        # We decorate mutation/crossover operators with constrain, which
        # replaces a child with a random parent if func(ind) is True.
        constrain = partial(gp.staticLimit,
                            max_value=0)  # Constraint decorator
        funcs = []
        if constrain_min_len:
            funcs.append(
                constraints.make_check_min_len(min_length))  # Minimum length
        if constrain_max_len:
            funcs.append(
                constraints.make_check_max_len(max_length))  # Maximum length
        if constrain_inv:
            funcs.append(
                constraints.check_inv)  # Subsequence inverse unary operators
        if constrain_trig:
            funcs.append(constraints.check_trig)  # Nested trig operators
        if constrain_const and const:
            funcs.append(constraints.check_const)  # All children are constants
        if constrain_num_const and const:
            funcs.append(constraints.make_check_num_const(
                max_const))  # Number of constants
        for func in funcs:
            for variation in ["mate", "mutate"]:
                self.toolbox.decorate(variation, constrain(func))

        # Create the training function
        self.algorithm = algorithms.eaSimple
Exemple #2
0
    def set_const_optimizer(cls, name, **kwargs):
        """Sets the class' constant optimizer"""

        const_optimizer = make_const_optimizer(name, **kwargs)
        Program.const_optimizer = const_optimizer
    def __init__(self,
                 dataset,
                 metric="nmse",
                 population_size=1000,
                 generations=1000,
                 n_samples=None,
                 tournament_size=3,
                 p_crossover=0.5,
                 p_mutate=0.1,
                 max_depth=17,
                 max_len=None,
                 max_const=None,
                 const_range=[-1, 1],
                 const_optimizer="scipy",
                 const_params=None,
                 seed=0,
                 early_stopping=False,
                 threshold=1e-12,
                 verbose=True):

        self.dataset = dataset
        self.fitted = False

        assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None."
        if generations is None:
            generations = int(n_samples / population_size)

        # Set hyperparameters
        self.population_size = population_size
        self.generations = generations
        self.tournament_size = tournament_size
        self.p_mutate = p_mutate
        self.p_crossover = p_crossover
        self.max_depth = max_depth
        self.seed = seed
        self.early_stopping = early_stopping
        self.threshold = threshold
        self.verbose = verbose

        # Making train/test fitness functions
        fitness = self.make_fitness(metric)
        fitness_train = partial(fitness,
                                y=dataset.y_train,
                                var_y=np.var(
                                    dataset.y_train))  # Function of y_hat
        fitness_test = partial(fitness,
                               y=dataset.y_test,
                               var_y=np.var(
                                   dataset.y_test))  # Function of y_hat
        fitness_train_noiseless = partial(
            fitness,
            y=dataset.y_train_noiseless,
            var_y=np.var(dataset.y_train))  # Function of y_hat
        fitness_test_noiseless = partial(
            fitness, y=dataset.y_test_noiseless,
            var_y=np.var(dataset.y_test))  # Function of y_hat
        self.eval_train = partial(
            self.evaluate,
            optimize=True,
            fitness=fitness_train,
            X=dataset.X_train.T)  # Function of individual
        self.eval_test = partial(self.evaluate,
                                 optimize=False,
                                 fitness=fitness_test,
                                 X=dataset.X_test.T)  # Function of individual
        self.eval_train_noiseless = partial(
            self.evaluate,
            optimize=False,
            fitness=fitness_train_noiseless,
            X=dataset.X_train.T)  # Function of individual
        self.eval_test_noiseless = partial(
            self.evaluate,
            optimize=False,
            fitness=fitness_test_noiseless,
            X=dataset.X_test.T)  # Function of individual
        nmse = partial(self.make_fitness("nmse"),
                       y=dataset.y_test,
                       var_y=np.var(dataset.y_test))  # Function of y_hat
        self.nmse = partial(self.evaluate,
                            optimize=False,
                            fitness=nmse,
                            X=dataset.X_test.T)  # Function of individual

        # Create the primitive set
        pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1])

        # Add input variables
        rename_kwargs = {
            "ARG{}".format(i): "x{}".format(i + 1)
            for i in range(dataset.n_input_var)
        }
        pset.renameArguments(**rename_kwargs)

        # Add primitives
        for k, v in _function_map.items():
            if k in dataset.function_set:
                pset.addPrimitive(v.function, v.arity, name=v.name)

        # # Add constant
        # if "const" in dataset.function_set:
        #     pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1]))

        # Add constant
        const = "const" in dataset.function_set
        if const:
            const_params = const_params if const_params is not None else {}
            self.const_opt = make_const_optimizer(const_optimizer,
                                                  **const_params)
            pset.addTerminal(1.0, name="const")

        # Create custom fitness and individual classes
        creator.create("FitnessMin", base.Fitness, weights=(-1.0, ))
        creator.create("Individual",
                       gp.PrimitiveTree,
                       fitness=creator.FitnessMin)

        # Define the evolutionary operators
        self.toolbox = base.Toolbox()
        self.toolbox.register("expr",
                              gp.genHalfAndHalf,
                              pset=pset,
                              min_=1,
                              max_=2)
        self.toolbox.register("individual", tools.initIterate,
                              creator.Individual, self.toolbox.expr)
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)
        self.toolbox.register("compile", gp.compile, pset=pset)
        self.toolbox.register("evaluate", self.eval_train)
        self.toolbox.register("select",
                              tools.selTournament,
                              tournsize=tournament_size)
        self.toolbox.register("mate", gp.cxOnePoint)
        self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
        self.toolbox.register('mutate',
                              gp.mutUniform,
                              expr=self.toolbox.expr_mut,
                              pset=pset)
        if max_depth is not None:
            self.toolbox.decorate(
                "mate",
                gp.staticLimit(key=operator.attrgetter("height"),
                               max_value=max_depth))
            self.toolbox.decorate(
                "mutate",
                gp.staticLimit(key=operator.attrgetter("height"),
                               max_value=max_depth))
        if max_len is not None:
            self.toolbox.decorate("mate",
                                  gp.staticLimit(key=len, max_value=max_len))
            self.toolbox.decorate("mutate",
                                  gp.staticLimit(key=len, max_value=max_len))
        if const and max_const is not None:
            num_const = lambda ind: len(
                [node for node in ind if node.name == "const"])
            self.toolbox.decorate(
                "mate", gp.staticLimit(key=num_const, max_value=max_const))
            self.toolbox.decorate(
                "mutate", gp.staticLimit(key=num_const, max_value=max_const))

        # Create the training function
        self.algorithm = algorithms.eaSimple