Exemple #1
0
    def predict(self, X):
        """ A reference implementation of a predicting function.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns an array of ones.
        """
        check_is_fitted(self)
        X = check_array(X, accept_sparse=False)
        ds = op.Dataset(X)
        rg = op.Range(0, ds.Rows)
        return op.Evaluate(self._model, ds, rg)
def model(est, X):
    #TODO: replace with est._model_str_ when PR merged
    return str(op.InfixFormatter.Format(est._model, op.Dataset(X), 3))
Exemple #3
0
    def fit(self, X, y, show_model=False):
        """A reference implementation of a fitting function.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        Returns
        -------
        self : object
            Returns self.
        """
        X, y                  = check_X_y(X, y, accept_sparse=False)
        D                     = np.column_stack((X, y))

        ds                    = op.Dataset(D)
        target                = max(ds.Variables, key=lambda x: x.Index) # last column is the target
        inputs                = op.VariableCollection(v for v in ds.Variables if v.Index != target.Index)
        training_range        = op.Range(0, ds.Rows)
        test_range            = op.Range(ds.Rows-1, ds.Rows) # hackish, because it can't be empty
        problem               = op.Problem(ds, inputs, target.Name, training_range, test_range)

        pset                  = op.PrimitiveSet()
        pcfg                  = self.__init_primitive_config(self.allowed_symbols)
        pset.SetConfig(pcfg)

        creator               = self.__init_creator(self.initialization_method, pset, inputs)

        evaluator             = self.__init_evaluator(self.error_metric, problem)
        evaluator.Budget      = self.max_evaluations;
        evaluator.LocalOptimizationIterations = self.local_iterations

        female_selector       = self.__init_selector(self.female_selector, 0)
        male_selector         = self.__init_selector(self.male_selector, 0)
        reinserter            = self.__init_reinserter(self.reinserter, 0)
        cx                    = op.SubtreeCrossover(self.crossover_internal_probability, self.max_depth, self.max_length)

        mut                   = op.MultiMutation()
        mut_list = [] # this list is needed as a placeholder to keep alive the mutation operators objects (since the multi-mutation only stores references)
        for k in self.mutation:
            v = self.mutation[k]
            m = self.__init_mutation(k, inputs, pset, creator)
            mut.Add(m, v)
            mut_list.append(m)

        generator             = self.__init_generator(self.offspring_generator, evaluator, cx, mut, female_selector, male_selector)

        min_arity, max_arity  = pset.FunctionArityLimits()
        initializer           = op.UniformInitializer(creator, min_arity+1, self.max_length)

        if self.random_state is None:
            self.random_state = random.getrandbits(64)

        config                = op.GeneticAlgorithmConfig(
                                    generations      = self.generations,
                                    max_evaluations  = self.max_evaluations,
                                    local_iterations = self.local_iterations,
                                    population_size  = self.population_size,
                                    pool_size        = self.pool_size,
                                    p_crossover      = self.crossover_probability,
                                    p_mutation       = self.mutation_probability,
                                    seed             = self.random_state
                                    )

        gp                    = op.GeneticProgrammingAlgorithm(problem, config, initializer, generator, reinserter)

        rng                   = op.RomuTrio(np.uint64(config.Seed))

        gp.Run(rng, None, self.n_threads)
        comp                  = op.SingleObjectiveComparison(0)
        best                  = gp.BestModel(comp)

        y_pred                = op.Evaluate(best.Genotype, ds, training_range)
        a, b                  = op.FitLeastSquares(y_pred, y)

        # add four nodes at the top of the tree for linear scaling
        nodes                 = best.Genotype.Nodes
        nodes.extend([ op.Node.Constant(b), op.Node.Mul(), op.Node.Constant(a), op.Node.Add() ])

        self._model           = op.Tree(nodes).UpdateNodes()

        if show_model:
            print(op.InfixFormatter.Format(self._model, ds, 12))
            print('internal model r2: ', 1 - best[0])

        self._stats = {
            'model_length':        self._model.Length - 4, # do not count scaling nodes?
            'generations':         gp.Generation,
            'fitness_evaluations': evaluator.FitnessEvaluations,
            'local_evaluations':   evaluator.LocalEvaluations,
            'random_state':        self.random_state
                }

        self.is_fitted_ = True
        # `fit` should always return `self`
        return self
Exemple #4
0
    def fit(self, X, y):
        """A reference implementation of a fitting function.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        Returns
        -------
        self : object
            Returns self.
        """
        X, y                  = check_X_y(X, y, accept_sparse=False)
        D                     = np.column_stack((X, y))

        ds                    = op.Dataset(D)
        target                = max(ds.Variables, key=lambda x: x.Index) # last column is the target

        inputs                = op.VariableCollection(v for v in ds.Variables if v.Index != target.Index)
        training_range        = op.Range(0, ds.Rows)
        test_range            = op.Range(ds.Rows-1, ds.Rows) # hackish, because it can't be empty
        problem               = op.Problem(ds, inputs, target.Name, training_range, test_range)

        pset                  = op.PrimitiveSet()
        pcfg                  = self.__init_primitive_config(self.allowed_symbols)
        pset.SetConfig(pcfg)

        creator               = self.__init_creator(self.initialization_method, pset, inputs)

        single_objective      = True if len(self.objectives) == 1 else False

        evaluators = [] # placeholder for the evaluator(s)

        for obj in self.objectives:
            eval_         = self.__init_evaluator(obj, problem, self._interpreter)
            eval_.Budget  = self.max_evaluations
            eval_.LocalOptimizationIterations = self.local_iterations
            evaluators.append(eval_)

        if single_objective:
            evaluator = evaluators[0]
        else:
            evaluator = op.MultiEvaluator(problem)
            for eval_ in evaluators:
                evaluator.Add(eval_)
            evaluator.LocalOptimizationIterations = self.local_iterations
            evaluator.Budget = self.max_evaluations

        comparison            = op.SingleObjectiveComparison(0) if single_objective else op.CrowdedComparison()

        female_selector       = self.__init_selector(self.female_selector, comparison)
        male_selector         = self.__init_selector(self.male_selector, comparison)
        reinserter            = self.__init_reinserter(self.reinserter, comparison)
        cx                    = op.SubtreeCrossover(self.crossover_internal_probability, self.max_depth, self.max_length)

        mut                   = op.MultiMutation()
        mut_list = [] # this list is needed as a placeholder to keep alive the mutation operators objects (since the multi-mutation only stores references)
        for k in self.mutation:
            v = self.mutation[k]
            m = self.__init_mutation(k, inputs, pset, creator)
            mut.Add(m, v)
            mut_list.append(m)

        generator             = self.__init_generator(self.offspring_generator, evaluator, cx, mut, female_selector, male_selector)

        min_arity, max_arity  = pset.FunctionArityLimits()
        initializer           = op.UniformInitializer(creator, min_arity+1, self.max_length)
        initializer.MinDepth  = 1
        initializer.MaxDepth  = 1000

        if self.random_state is None:
            self.random_state = random.getrandbits(64)

        config                = op.GeneticAlgorithmConfig(
                                    generations      = self.generations,
                                    max_evaluations  = self.max_evaluations,
                                    local_iterations = self.local_iterations,
                                    population_size  = self.population_size,
                                    pool_size        = self.pool_size,
                                    p_crossover      = self.crossover_probability,
                                    p_mutation       = self.mutation_probability,
                                    seed             = self.random_state,
                                    time_limit       = self.time_limit
                                    )

        sorter                = None if single_objective else op.RankSorter()
        gp                    = op.GeneticProgrammingAlgorithm(problem, config, initializer, generator, reinserter) if single_objective else op.NSGA2Algorithm(problem, config, initializer, generator, reinserter, sorter) 
        rng                   = op.RomuTrio(np.uint64(config.Seed))

        gp.Run(rng, None, self.n_threads)
        comp                  = op.SingleObjectiveComparison(0)
        best                  = gp.BestModel(comp)
        nodes                 = best.Genotype.Nodes
        n_vars                = len([ node for node in nodes if node.IsVariable ])

        # add four nodes at the top of the tree for linear scaling
        y_pred                = op.Evaluate(self._interpreter, best.Genotype, ds, training_range)
        scale, offset         = op.FitLeastSquares(y_pred, y)
        nodes.extend([ op.Node.Constant(scale), op.Node.Mul(), op.Node.Constant(offset), op.Node.Add() ])

        self._model           = op.Tree(nodes).UpdateNodes()

        # update model vars dictionary
        self._model_vars = { node.HashValue : ds.GetVariable(node.HashValue).Name for node in nodes if node.IsVariable }

        self._stats = {
            'model_length':        self._model.Length - 4, # do not count scaling nodes?
            'model_complexity':    self._model.Length - 4 + 2 * n_vars,
            'generations':         gp.Generation,
            'fitness_evaluations': evaluator.FitnessEvaluations,
            'local_evaluations':   evaluator.LocalEvaluations,
            'random_state':        self.random_state
        }

        self.is_fitted_ = True
        # `fit` should always return `self`
        return self