def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveRegressor( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) else: call_fit = False if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "epsilon_insensitive" else "pa2" self.estimator._partial_fit( X, y, alpha=1.0, C=self.estimator.C, loss="epsilon_insensitive", learning_rate=lr, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
def fit(self, X, Y): from sklearn.svm import LinearSVR # In case of nested loss if isinstance(self.loss, dict): combination = self.loss self.loss = combination['loss'] self.dual = combination['dual'] self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) self.estimator = LinearSVR(loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, random_state=self.random_state) self.estimator.fit(X, Y) return self
def fit(self, X, Y): from sklearn.svm import SVR self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) self.estimator = SVR(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, max_iter=self.max_iter) self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1]**float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=n_iter, criterion=self.criterion, max_features=max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, class_weight=self.class_weight, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def fit(self, X, Y): import sklearn.svm import sklearn.multiclass # In case of nested penalty if isinstance(self.penalty, dict): combination = self.penalty self.penalty = combination['penalty'] self.loss = combination['loss'] self.dual = combination['dual'] self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC( penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def fit(self, X, Y): from sklearn.svm import LinearSVR self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) self.estimator = LinearSVR(loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, random_state=self.random_state) self.estimator.fit(X, Y) return self
def fit(self, X, y, sample_weight=None): from sklearn.ensemble import ExtraTreesClassifier self.bootstrap = check_for_bool(self.bootstrap) self.estimator = ExtraTreesClassifier(n_estimators=self.n_estimators, max_leaf_nodes=None, criterion=self.criterion, max_features=self.max_features, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_depth=None, bootstrap=self.bootstrap, random_state=self.random_state, n_jobs=self.n_jobs) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def fit(self, X, Y): import sklearn.svm # Nested kernel if isinstance(self.kernel, tuple): nested_kernel = self.kernel self.kernel = nested_kernel[0] if self.kernel == 'poly': self.degree = nested_kernel[1]['degree'] self.coef0 = nested_kernel[1]['coef0'] elif self.kernel == 'sigmoid': self.coef0 = nested_kernel[1]['coef0'] self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def fit(self, X, Y): from sklearn.svm import SVR # Nested kernel if isinstance(self.kernel, tuple): nested_kernel = self.kernel self.kernel = nested_kernel[0] if self.kernel == 'poly': self.degree = nested_kernel[1]['degree'] self.coef0 = nested_kernel[1]['coef0'] elif self.kernel == 'sigmoid': self.coef0 = nested_kernel[1]['coef0'] self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) self.estimator = SVR(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, max_iter=self.max_iter) self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: if isinstance(self.loss, tuple): nested_loss = self.loss self.loss = nested_loss[0] if self.loss == 'modified_huber': self.epsilon = nested_loss[1]['epsilon'] if isinstance(self.penalty, tuple): nested_penalty = self.penalty self.penalty = nested_penalty[0] if self.penalty == "elasticnet": self.l1_ratio = nested_penalty[1]['l1_ratio'] if isinstance(self.learning_rate, tuple): nested_learning_rate = self.learning_rate self.learning_rate = nested_learning_rate[0] if self.learning_rate == 'invscaling': self.eta0 = nested_learning_rate[1]['eta0'] self.power_t = nested_learning_rate[1]['power_t'] elif self.learning_rate == 'constant': self.eta0 = nested_learning_rate[1]['eta0'] self.fully_fit_ = False self.alpha = float(self.alpha) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon = float(self.epsilon) if self.epsilon is not None \ else 0.1 self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01 self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.estimator = SGDClassifier(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, classes=None, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveClassifier( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) self.classes_ = np.unique(y.astype(int)) else: call_fit = False # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.max_iter = 50 self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "hinge" else "pa2" self.estimator._partial_fit(X, y, alpha=1.0, C=self.estimator.C, loss="hinge", learning_rate=lr, max_iter=n_iter, classes=None, sample_weight=sample_weight, coef_init=None, intercept_init=None) if (self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_): self.fully_fit_ = True return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.alpha = float(self.alpha) if not check_none(self.epsilon_insensitive): self.epsilon_insensitive = float(self.epsilon_insensitive) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon_huber = float(self.epsilon_huber) if self.epsilon_huber is not None \ else 0.1 self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01 self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) if self.loss == "huber": epsilon = self.epsilon_huber elif self.loss in [ "epsilon_insensitive", "squared_epsilon_insensitive" ]: epsilon = self.epsilon_insensitive else: epsilon = None self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self