def fit(self, X, Y, sample_weight=None): import sklearn.ensemble import sklearn.tree self.n_estimators = int(self.n_estimators) self.learning_rate = float(self.learning_rate) self.max_depth = int(self.max_depth) base_estimator = sklearn.tree.DecisionTreeClassifier(max_depth=self.max_depth) estimator = sklearn.ensemble.AdaBoostClassifier( base_estimator=base_estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, algorithm=self.algorithm, random_state=self.random_state ) if len(Y.shape) == 2 and Y.shape[1] > 1: estimator = MultilabelClassifier(estimator, n_jobs=1) estimator.fit(X, Y, sample_weight=sample_weight) else: estimator.fit(X, Y, sample_weight=sample_weight) self.estimator = estimator return self
def iterative_fit(self, X, y, n_iter=1, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDClassifier if refit: self.estimator = None if self.estimator is None: self._iterations = 0 self.alpha = float(self.alpha) self.fit_intercept = self.fit_intercept == 'True' self.n_iter = int(self.n_iter) self.l1_ratio = float( self.l1_ratio) if self.l1_ratio is not None else 0.15 self.epsilon = float( self.epsilon) if self.epsilon is not None else 0.1 self.eta0 = float(self.eta0) self.power_t = float( self.power_t) if self.power_t is not None else 0.25 self.average = self.average == 'True' self.estimator = SGDClassifier( loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, n_iter=1, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, ) # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: self.estimator.n_iter = self.n_iter self.estimator = MultilabelClassifier(self.estimator, n_jobs=1) self.estimator.fit(X, y, sample_weight=sample_weight) self.fully_fit_ = True else: self.estimator.n_iter = n_iter self.estimator.partial_fit(X, y, classes=np.unique(y), sample_weight=sample_weight) if self._iterations >= self.n_iter: self.fully_fit_ = True self._iterations += n_iter return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): import sklearn.ensemble # Special fix for gradient boosting! if isinstance(X, np.ndarray): X = np.ascontiguousarray(X, dtype=X.dtype) if refit: self.estimator = None if self.estimator is None: self.learning_rate = float(self.learning_rate) self.n_estimators = int(self.n_estimators) self.subsample = float(self.subsample) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) if self.max_depth == "None": self.max_depth = None else: self.max_depth = int(self.max_depth) num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) if self.max_leaf_nodes == "None": self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.verbose = int(self.verbose) self.estimator = sklearn.ensemble.GradientBoostingClassifier( loss=self.loss, learning_rate=self.learning_rate, n_estimators=0, subsample=self.subsample, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_depth=self.max_depth, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, init=self.init, random_state=self.random_state, verbose=self.verbose, warm_start=True, ) # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.n_estimators = self.n_estimators self.estimator = MultilabelClassifier(self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: tmp = self.estimator # TODO copy ? tmp.n_estimators += n_iter tmp.fit(X, y, sample_weight=sample_weight) self.estimator = tmp # Apparently this if is necessary if self.estimator.n_estimators >= self.n_estimators: self.fully_fit_ = True return self