def fit(self, X_train, y_train): self.n_classes = len(np.unique(y_train)) self.estimators_levels = [] klass = self.base_estimator.__class__ predictions_levels = [] self.classes = np.unique(y_train) #first level estimators = [klass(**params) for params in self.params_list] self.estimators_levels.append(estimators) predictions = [] for estimator in estimators: estimator.fit(X_train, y_train) if self.k_fold > 1: # use cv predict_ = cvp(estimator, X_train, y_train, cv=self.k_fold) else: #use oob predict_ = estimator.oob_decision_function_ #fill default value if meet nan inds = np.where(np.isnan(predict_)) predict_[inds] = 1. / self.n_classes predictions.append(predict_) attr_to_next_level = np.hstack(predictions) y_pre = self.classes.take(np.argmax(np.array(predictions).mean(axis=0), axis=1), axis=0) self.max_accuracy = self.evaluate(y_pre, y_train) #cascade step while True: print('level {}, CV accuracy: {}'.format( len(self.estimators_levels), self.max_accuracy)) estimators = [klass(**params) for params in self.params_list] self.estimators_levels.append(estimators) predictions = [] X_train_step = np.hstack((attr_to_next_level, X_train)) for estimator in estimators: estimator.fit(X_train_step, y_train) if self.k_fold > 1: # use cv predict_ = cvp(estimator, X_train_step, y_train, cv=self.k_fold) else: #use oob predict_ = estimator.oob_decision_function_ #fill default value if meet nan inds = np.where(np.isnan(predict_)) predict_[inds] = 1. / self.n_classes predictions.append(predict_) attr_to_next_level = np.hstack(predictions) y_pre = self.classes.take(np.argmax( np.array(predictions).mean(axis=0), axis=1), axis=0) accuracy = self.evaluate(y_pre, y_train) if accuracy > self.max_accuracy: self.max_accuracy = accuracy else: self.estimators_levels.pop() break
def predict(self, X, y): """ Returns a generator containing the predictions for each of the internal models (using cross_val_predict and a CV=12). """ for model in self.models: yield cvp(model, X, y, cv=12)
def scan_fit(self, X, y): self.n_classes = len(np.unique(y)) newX, newy, scan_round_total = self._sample_slicer(X, y) sample_vector_list = [] for estimator in self.estimators: estimator.fit(newX, newy) if self.k_fold > 1: # use cv predict_ = cvp(estimator, newX, newy, cv=self.k_fold) else: #use oob predict_ = estimator.oob_decision_function_ #fill default value if meet nan inds = np.where(np.isnan(predict_)) predict_[inds] = 1. / self.n_classes sample_vector = predict_.reshape( (len(X), scan_round_total * self.n_classes)) sample_vector_list.append(sample_vector) return np.hstack(sample_vector_list)
def predict(self, X, y): """ Returns a generator containing the predictions for each of the internal models (using cross_val_predict and a CV=12). Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs: dict keyword arguments passed to Scikit-Learn API. """ for model in self.models: yield cvp(model, X, y, cv=12)