Beispiel #1
0
    def CalculatePi(self):
        njobs = self.n_jobs
        front_y = self.pareto_front_point()
        front_y = self.y[front_y, :].T

        predict_y_all = self.predict_y_all

        del self.predict_y_all
        gc.collect()

        def tile_func(i, front_y0):
            tile = 0
            for front_y_i in front_y0.T:
                big = i - front_y_i
                big_bool = np.max(big, axis=1) < 0
                tile |= big_bool
            return tile

        tile_all = parallelize(n_jobs=njobs,
                               func=tile_func,
                               iterable=predict_y_all,
                               front_y=front_y)
        pi = np.sum(1 - np.array(tile_all), axis=1) / self.number
        self.Pi = pi
        return pi
Beispiel #2
0
    def parallelize_score(self, inds):
        """

        Parameters
        ----------
        inds:list of SymbolTree

        Returns
        -------
        list of (score,dim,dim_score)
        """

        indss = [i.capsule for i in inds]

        calls = functools.partial(calculate_collect, context=self.context, x=self.data_x, y=self.y,
                                  terminals_and_constants_repr=self.terminals_and_constants_repr,
                                  gro_ter_con=self.gro_ter_con, cv=self.cv, refit=self.refit,
                                  dim_ter_con_list=self.dim_ter_con_list, dim_type=self.dim_type,
                                  fuzzy=self.fuzzy,
                                  scoring=self.scoring, score_pen=self.score_pen,
                                  vector_add=self.vector_add,
                                  add_coef=self.add_coef, inter_add=self.inter_add,
                                  inner_add=self.inner_add, np_maps=self.np_map,
                                  filter_warning=self.filter_warning,
                                  dim_maps=self.dim_map, cal_dim=self.cal_dim)

        score_dim_list = parallelize(func=calls, iterable=indss, n_jobs=self.n_jobs,
                                     respective=False,
                                     tq=self.tq, batch_size=self.batch_size)

        return score_dim_list
Beispiel #3
0
    def cal_binary_distance_all(self, slices=None, estimator_i=0):
        """ calculate the distance matrix of slices """
        self.estimator_i = estimator_i if isinstance(estimator_i,
                                                     int) else self.estimator_i
        n_jobs = self.n_jobs
        slices = slices if slices else self.slices

        ret = self.check_prop("cal_binary_distance_all",
                              estimator_i=self.estimator_i,
                              slices=slices)

        if ret is not None:
            pass
        else:
            cal_binary_distance = partial(self.cal_binary_distance)
            slices_cuple = list(itertools.product(slices, repeat=2))
            ret = parallelize(n_jobs=n_jobs,
                              func=cal_binary_distance,
                              iterable=slices_cuple,
                              respective=True)
            ret = np.reshape(ret, (len(slices), len(slices)), order='F')

            self.add_prop("cal_binary_distance_all",
                          estimator_i=self.estimator_i,
                          slices=slices,
                          values=ret)

        self.slices = slices
        return ret
Beispiel #4
0
    def cal_y_distance_all(self, slices=None, estimator_i=0):
        """ calculate binary distance of 2 nodes """

        self.estimator_i = estimator_i if isinstance(estimator_i,
                                                     int) else self.estimator_i
        n_jobs = self.n_jobs
        slices = slices if slices else self.slices

        ret = self.check_prop("cal_y_distance_all",
                              estimator_i=self.estimator_i,
                              slices=slices)

        if ret is not None:
            pass
        else:
            cal_score = partial(self.cal_y_distance)
            ret = parallelize(n_jobs=n_jobs, func=cal_score, iterable=slices)

            self.add_prop("cal_y_distance_all",
                          estimator_i=self.estimator_i,
                          slices=slices,
                          values=ret)

        self.slices = slices
        return np.array(ret)
Beispiel #5
0
    def _fit(self, x, y):
        def score_pri(slices, x0, y0):
            slices = list(slices)
            if len(slices) < 1:
                score0 = -np.inf
            else:
                slices = self.feature_unfold(slices)
                data_x0 = x0[:, slices]

                self.estimator.fit(data_x0, y0)

                score0 = np.mean(self.estimator.best_score_)  # score_test

                # print(slices, score0)
            return score0

        score = partial(score_pri, x0=x, y0=y)

        self.score_ = []
        x, y = check_X_y(x, y, "csc")
        assert all((self.check_must, self.check_muti)) in [True, False]

        feature_list = list(range(x.shape[1]))
        fold_feature_list = self.feature_fold(feature_list)
        if self.check_must:
            fold_feature_list = [
                i for i in fold_feature_list if i not in self.check_must
            ]

        slice_all = [combinations(fold_feature_list, i) for i in self.n_select]
        slice_all = [
            list(self.feature_must_fold(_)) for i in slice_all for _ in i
        ]

        scores = parallelize(n_jobs=self.n_jobs,
                             func=score,
                             iterable=slice_all)

        feature_combination = [self.feature_unfold(_) for _ in slice_all]
        index = np.argmax(scores)
        select_feature = feature_combination[index]
        su = np.zeros(x.shape[1], dtype=np.bool)
        su[select_feature] = 1
        self.best_score_ = max(scores)
        self.score_ = scores
        self.support_ = su
        self.estimator_ = clone(self.estimator)
        if self.refit:
            self.estimator_.fit(x[:, select_feature], y)
        self.n_feature_ = len(select_feature)
        self.score_ex = list(zip(feature_combination, scores))
        self.scatter = list(zip([len(i) for i in slice_all], scores))
        self.score_ex.sort(key=lambda _: _[1], reverse=True)

        return self
Beispiel #6
0
    def _fit(self, x, y, searchspace0, regclf0):
        def fit_parllize(random_state):
            data_train, y_train = sklearn.utils.resample(
                x, y, n_samples=None, replace=True, random_state=random_state)
            regclf0.fit(data_train, y_train)
            predict_data = regclf0.predict(searchspace0)
            predict_data.ravel()
            return predict_data

        njobs = self.n_jobs
        number = self.number

        predict_dataj = parallelize(n_jobs=njobs,
                                    func=fit_parllize,
                                    iterable=range(number))

        return np.array(predict_dataj)
Beispiel #7
0
    def cv_score_all(self, slices=None, estimator_i=0):
        """score all node with r2

        Parameters
        ----------
        slices : list, or None, default spath.slices
            change to new slices to calculate
            the lists of the index of feature subsets, each feature subset is a node,each int is the index of X
            Examples 3 nodes
            [[1,4,5],[1,4,6],[1,2,7]]
        estimator_i: int, default spath.estimator_i
            change to the estimator_i to calculate

        Returns
        ----------
            score_mean_std: nd.ndarray 2D
            the mean and std

        """

        self.estimator_i = estimator_i if isinstance(estimator_i,
                                                     int) else self.estimator_i
        n_jobs = self.n_jobs
        slices = slices if slices else self.slices

        ret = self.check_prop("cv_score_all",
                              estimator_i=self.estimator_i,
                              slices=slices)

        if ret is not None:
            pass
        else:
            cal_score = partial(self.cv_score)
            ret = parallelize(n_jobs=n_jobs, func=cal_score, iterable=slices)

            self.add_prop("cv_score_all",
                          estimator_i=self.estimator_i,
                          slices=slices,
                          values=ret)

        self.slices = slices
        return np.array(ret)
Beispiel #8
0
    def _cv_predict_all(self, slices=None, estimator_i=0):
        """ calculate binary distance of 2 nodes """

        self.estimator_i = estimator_i if isinstance(estimator_i, int) else self.estimator_i
        n_jobs = self.n_jobs
        batch_size = self.batch_size
        slices = slices if slices else self.slices

        ret = self.check_prop("cv_predict_all", estimator_i=self.estimator_i, slices=slices)

        if ret is not None:
            pass
        else:
            cal_score = partial(self.predict)
            ret = parallelize(n_jobs=n_jobs, func=cal_score, iterable=slices, batch_size=batch_size)

            self.add_prop("cv_predict_all", estimator_i=self.estimator_i, slices=slices, values=ret)

        self.slices = slices

        return ret
Beispiel #9
0
    def fit(self, X, y, groups=None):
        """Fit the baf model and automatically tune the number of selected
           feature.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_feature]
            Training vector, where `n_samples` is the number of samples and
            `n_feature` is the total number of feature.

        y : array-like, shape = [n_samples]
            Target values (integers for classification, real numbers for
            regression).

        groups : array-like, shape = [n_samples], optional
            cal_group labels for the samples used while splitting the dataset into
            train/test set.
        """
        X, y = check_X_y(X, y, "csr")
        # Initialization
        cv = check_cv(self.cv, y, is_classifier(self.estimator))
        scorer = check_scoring(self.estimator, scoring=self.scoring)
        ran = check_random_state(self.random_state)

        baf = BackForward(
            estimator=self.estimator,
            n_type_feature_to_select=self.n_type_feature_to_select,
            verbose=self.verbose,
            primary_feature=self.primary_feature,
            muti_grade=self.muti_grade,
            muti_index=self.muti_index,
            must_index=self.must_index,
            random_state=ran)

        func = partial(_baf_single_fit,
                       baf=baf,
                       estimator=self.estimator,
                       X=X,
                       y=y,
                       scorer=scorer,
                       random_state=ran)

        scores = parallelize(n_jobs=self.n_jobs,
                             func=func,
                             iterable=cv.split(X, y, groups),
                             respective=True)

        support, scores, score_step = zip(*scores)
        best_support = support[np.argmax(scores)]
        best_score = max(scores)
        # Re-execute an elimination with best_k over the whole set

        # Set final attributes
        self.support_step = score_step
        self.support_cv = support
        self.support_ = best_support
        self.score_cv = scores
        self.score_ = best_score
        self.estimator_ = clone(self.estimator)
        self.estimator_.fit(X[:, self.support_], y)
        self.n_feature_ = np.count_nonzero(support)
        return self