def fit(self, X_train, y_train):
        self.n_classes = len(np.unique(y_train))
        self.estimators_levels = []
        klass = self.base_estimator.__class__
        predictions_levels = []
        self.classes = np.unique(y_train)

        #first level
        estimators = [klass(**params) for params in self.params_list]
        self.estimators_levels.append(estimators)
        predictions = []
        for estimator in estimators:
            estimator.fit(X_train, y_train)
            if self.k_fold > 1:  # use cv
                predict_ = cvp(estimator, X_train, y_train, cv=self.k_fold)
            else:  #use oob
                predict_ = estimator.oob_decision_function_
                #fill default value if meet nan
                inds = np.where(np.isnan(predict_))
                predict_[inds] = 1. / self.n_classes
            predictions.append(predict_)
        attr_to_next_level = np.hstack(predictions)
        y_pre = self.classes.take(np.argmax(np.array(predictions).mean(axis=0),
                                            axis=1),
                                  axis=0)
        self.max_accuracy = self.evaluate(y_pre, y_train)

        #cascade step
        while True:
            print('level {}, CV accuracy: {}'.format(
                len(self.estimators_levels), self.max_accuracy))
            estimators = [klass(**params) for params in self.params_list]
            self.estimators_levels.append(estimators)
            predictions = []
            X_train_step = np.hstack((attr_to_next_level, X_train))
            for estimator in estimators:
                estimator.fit(X_train_step, y_train)
                if self.k_fold > 1:  # use cv
                    predict_ = cvp(estimator,
                                   X_train_step,
                                   y_train,
                                   cv=self.k_fold)
                else:  #use oob
                    predict_ = estimator.oob_decision_function_
                    #fill default value if meet nan
                    inds = np.where(np.isnan(predict_))
                    predict_[inds] = 1. / self.n_classes
                predictions.append(predict_)
            attr_to_next_level = np.hstack(predictions)
            y_pre = self.classes.take(np.argmax(
                np.array(predictions).mean(axis=0), axis=1),
                                      axis=0)
            accuracy = self.evaluate(y_pre, y_train)
            if accuracy > self.max_accuracy:
                self.max_accuracy = accuracy
            else:
                self.estimators_levels.pop()
                break
Example #2
0
 def predict(self, X, y):
     """
     Returns a generator containing the predictions for each of the
     internal models (using cross_val_predict and a CV=12).
     """
     for model in self.models:
         yield cvp(model, X, y, cv=12)
Example #3
0
 def predict(self, X, y):
     """
     Returns a generator containing the predictions for each of the
     internal models (using cross_val_predict and a CV=12).
     """
     for model in self.models:
         yield cvp(model, X, y, cv=12)
 def scan_fit(self, X, y):
     self.n_classes = len(np.unique(y))
     newX, newy, scan_round_total = self._sample_slicer(X, y)
     sample_vector_list = []
     for estimator in self.estimators:
         estimator.fit(newX, newy)
         if self.k_fold > 1:  # use cv
             predict_ = cvp(estimator, newX, newy, cv=self.k_fold)
         else:  #use oob
             predict_ = estimator.oob_decision_function_
             #fill default value if meet nan
             inds = np.where(np.isnan(predict_))
             predict_[inds] = 1. / self.n_classes
         sample_vector = predict_.reshape(
             (len(X), scan_round_total * self.n_classes))
         sample_vector_list.append(sample_vector)
     return np.hstack(sample_vector_list)
Example #5
0
    def predict(self, X, y):
        """
        Returns a generator containing the predictions for each of the
        internal models (using cross_val_predict and a CV=12).

        Parameters
        ----------

        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs: dict
            keyword arguments passed to Scikit-Learn API.

        """
        for model in self.models:
            yield cvp(model, X, y, cv=12)