def fit(self, X, y, sample_weight=None): from sklearn.tree import DecisionTreeClassifier self.max_features = float(self.max_features) # Heuristic to set the tree depth if check_none(self.max_depth_factor): max_depth_factor = self.max_depth_factor = None else: num_features = X.shape[1] self.max_depth_factor = int(self.max_depth_factor) max_depth_factor = max( 1, int(np.round(self.max_depth_factor * num_features, 0))) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = DecisionTreeClassifier( criterion=self.criterion, max_depth=max_depth_factor, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, class_weight=self.class_weight, random_state=self.random_state) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor as GBR # Special fix for gradient boosting! if isinstance(X, np.ndarray): X = np.ascontiguousarray(X, dtype=X.dtype) if refit: self.estimator = None if self.estimator is None: self.learning_rate = float(self.learning_rate) self.n_estimators = int(self.n_estimators) self.subsample = float(self.subsample) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.max_features = float(self.max_features) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.verbose = int(self.verbose) self.estimator = GBR( loss=self.loss, learning_rate=self.learning_rate, n_estimators=n_iter, subsample=self.subsample, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_depth=self.max_depth, criterion=self.criterion, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, random_state=self.random_state, verbose=self.verbose, warm_start=True, ) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) # Apparently this if is necessary if self.estimator.n_estimators >= self.n_estimators: self.fully_fit_ = True return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1]**float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=n_iter, criterion=self.criterion, max_features=max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, class_weight=self.class_weight, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def fit(self, X, Y): import sklearn.svm # Nested kernel if isinstance(self.kernel, tuple): nested_kernel = self.kernel self.kernel = nested_kernel[0] if self.kernel == 'poly': self.degree = nested_kernel[1]['degree'] self.coef0 = nested_kernel[1]['coef0'] elif self.kernel == 'sigmoid': self.coef0 = nested_kernel[1]['coef0'] self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def __init__(self, n_estimators, criterion, min_samples_leaf, min_samples_split, max_features, bootstrap, random_state=None): if check_none(n_estimators): self.n_estimators = None else: self.n_estimators = int(self.n_estimators) self.criterion = criterion self.min_samples_leaf = min_samples_leaf self.min_samples_split = min_samples_split self.max_features = max_features self.bootstrap = bootstrap self.n_jobs = -1 self.random_state = random_state self.estimator = None self.start_time = time.time() self.time_limit = None
def fit(self, X, Y): import sklearn.svm import sklearn.multiclass # In case of nested penalty if isinstance(self.penalty, dict): combination = self.penalty self.penalty = combination['penalty'] self.loss = combination['loss'] self.dual = combination['dual'] self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC( penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.multiclass from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # In case of nested shrinkage if isinstance(self.shrinkage, tuple): self.shrinkage_factor = self.shrinkage[1]['shrinkage_factor'] self.shrinkage = self.shrinkage[0] if check_none(self.shrinkage): self.shrinkage_ = None solver = 'svd' elif self.shrinkage == "auto": self.shrinkage_ = 'auto' solver = 'lsqr' elif self.shrinkage == "manual": self.shrinkage_ = float(self.shrinkage_factor) solver = 'lsqr' else: raise ValueError(self.shrinkage) self.n_components = int(self.n_components) self.tol = float(self.tol) estimator = LinearDiscriminantAnalysis(n_components=self.n_components, shrinkage=self.shrinkage_, tol=self.tol, solver=solver) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.alpha = float(self.alpha) if not check_none(self.epsilon_insensitive): self.epsilon_insensitive = float(self.epsilon_insensitive) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon_huber = float(self.epsilon_huber) if self.epsilon_huber is not None \ else 0.1 self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01 self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) if self.loss == "huber": epsilon = self.epsilon_huber elif self.loss in [ "epsilon_insensitive", "squared_epsilon_insensitive" ]: epsilon = self.epsilon_insensitive else: epsilon = None self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self