Ejemplo n.º 1
0
    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveRegressor

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            self.C = float(self.C)

            call_fit = True
            self.estimator = PassiveAggressiveRegressor(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
        else:
            call_fit = False

        if call_fit:
            self.estimator.fit(X, y)
        else:
            self.estimator.max_iter += n_iter
            self.estimator.max_iter = min(self.estimator.max_iter,
                                          1000)
            self.estimator._validate_params()
            lr = "pa1" if self.estimator.loss == "epsilon_insensitive" else "pa2"
            self.estimator._partial_fit(
                X, y,
                alpha=1.0,
                C=self.estimator.C,
                loss="epsilon_insensitive",
                learning_rate=lr,
                max_iter=n_iter,
                sample_weight=sample_weight,
                coef_init=None,
                intercept_init=None
            )
            if self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_:
                self.fully_fit_ = True

        return self
Ejemplo n.º 2
0
    def fit(self, X, Y):
        from sklearn.svm import LinearSVR

        # In case of nested loss
        if isinstance(self.loss, dict):
            combination = self.loss
            self.loss = combination['loss']
            self.dual = combination['dual']

        self.C = float(self.C)
        self.tol = float(self.tol)

        self.dual = check_for_bool(self.dual)

        self.fit_intercept = check_for_bool(self.fit_intercept)

        self.intercept_scaling = float(self.intercept_scaling)

        self.estimator = LinearSVR(loss=self.loss,
                                   dual=self.dual,
                                   tol=self.tol,
                                   C=self.C,
                                   fit_intercept=self.fit_intercept,
                                   intercept_scaling=self.intercept_scaling,
                                   random_state=self.random_state)
        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 3
0
    def fit(self, X, Y):
        from sklearn.svm import SVR

        self.C = float(self.C)
        if self.degree is None:
            self.degree = 3
        else:
            self.degree = int(self.degree)
        if self.gamma is None:
            self.gamma = 0.0
        else:
            self.gamma = float(self.gamma)
        if self.coef0 is None:
            self.coef0 = 0.0
        else:
            self.coef0 = float(self.coef0)
        self.tol = float(self.tol)
        self.max_iter = float(self.max_iter)

        self.shrinking = check_for_bool(self.shrinking)

        self.estimator = SVR(C=self.C,
                             kernel=self.kernel,
                             degree=self.degree,
                             gamma=self.gamma,
                             coef0=self.coef0,
                             shrinking=self.shrinking,
                             tol=self.tol,
                             max_iter=self.max_iter)
        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 4
0
    def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):
        from sklearn.ensemble import RandomForestClassifier

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.n_estimators = int(self.n_estimators)
            if check_none(self.max_depth):
                self.max_depth = None
            else:
                self.max_depth = int(self.max_depth)

            self.min_samples_split = int(self.min_samples_split)
            self.min_samples_leaf = int(self.min_samples_leaf)
            self.min_weight_fraction_leaf = float(
                self.min_weight_fraction_leaf)

            if self.max_features not in ("sqrt", "log2", "auto"):
                max_features = int(X.shape[1]**float(self.max_features))
            else:
                max_features = self.max_features

            self.bootstrap = check_for_bool(self.bootstrap)

            if check_none(self.max_leaf_nodes):
                self.max_leaf_nodes = None
            else:
                self.max_leaf_nodes = int(self.max_leaf_nodes)

            self.min_impurity_decrease = float(self.min_impurity_decrease)

            # initial fit of only increment trees
            self.estimator = RandomForestClassifier(
                n_estimators=n_iter,
                criterion=self.criterion,
                max_features=max_features,
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf,
                min_weight_fraction_leaf=self.min_weight_fraction_leaf,
                bootstrap=self.bootstrap,
                max_leaf_nodes=self.max_leaf_nodes,
                min_impurity_decrease=self.min_impurity_decrease,
                random_state=self.random_state,
                n_jobs=self.n_jobs,
                class_weight=self.class_weight,
                warm_start=True)
        else:

            self.estimator.n_estimators += n_iter
            self.estimator.n_estimators = min(self.estimator.n_estimators,
                                              self.n_estimators)

        self.estimator.fit(X, y, sample_weight=sample_weight)
        return self
Ejemplo n.º 5
0
    def fit(self, X, Y):
        import sklearn.svm
        import sklearn.multiclass

        # In case of nested penalty
        if isinstance(self.penalty, dict):
            combination = self.penalty
            self.penalty = combination['penalty']
            self.loss = combination['loss']
            self.dual = combination['dual']

        self.C = float(self.C)
        self.tol = float(self.tol)

        self.dual = check_for_bool(self.dual)

        self.fit_intercept = check_for_bool(self.fit_intercept)

        self.intercept_scaling = float(self.intercept_scaling)

        if check_none(self.class_weight):
            self.class_weight = None

        estimator = sklearn.svm.LinearSVC(
            penalty=self.penalty,
            loss=self.loss,
            dual=self.dual,
            tol=self.tol,
            C=self.C,
            class_weight=self.class_weight,
            fit_intercept=self.fit_intercept,
            intercept_scaling=self.intercept_scaling,
            multi_class=self.multi_class,
            random_state=self.random_state)

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator,
                                                                    n_jobs=1)
        else:
            self.estimator = estimator

        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 6
0
    def fit(self, X, Y):
        from sklearn.svm import LinearSVR

        self.C = float(self.C)
        self.tol = float(self.tol)

        self.dual = check_for_bool(self.dual)

        self.fit_intercept = check_for_bool(self.fit_intercept)

        self.intercept_scaling = float(self.intercept_scaling)

        self.estimator = LinearSVR(loss=self.loss,
                                   dual=self.dual,
                                   tol=self.tol,
                                   C=self.C,
                                   fit_intercept=self.fit_intercept,
                                   intercept_scaling=self.intercept_scaling,
                                   random_state=self.random_state)
        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 7
0
 def fit(self, X, y, sample_weight=None):
     from sklearn.ensemble import ExtraTreesClassifier
     self.bootstrap = check_for_bool(self.bootstrap)
     self.estimator = ExtraTreesClassifier(n_estimators=self.n_estimators,
                                           max_leaf_nodes=None,
                                           criterion=self.criterion,
                                           max_features=self.max_features,
                                           min_samples_split=self.min_samples_split,
                                           min_samples_leaf=self.min_samples_leaf,
                                           max_depth=None,
                                           bootstrap=self.bootstrap,
                                           random_state=self.random_state,
                                           n_jobs=self.n_jobs)
     self.estimator.fit(X, y, sample_weight=sample_weight)
     return self
Ejemplo n.º 8
0
    def fit(self, X, Y):
        import sklearn.svm
        # Nested kernel
        if isinstance(self.kernel, tuple):
            nested_kernel = self.kernel
            self.kernel = nested_kernel[0]
            if self.kernel == 'poly':
                self.degree = nested_kernel[1]['degree']
                self.coef0 = nested_kernel[1]['coef0']
            elif self.kernel == 'sigmoid':
                self.coef0 = nested_kernel[1]['coef0']

        self.C = float(self.C)
        if self.degree is None:
            self.degree = 3
        else:
            self.degree = int(self.degree)
        if self.gamma is None:
            self.gamma = 0.0
        else:
            self.gamma = float(self.gamma)
        if self.coef0 is None:
            self.coef0 = 0.0
        else:
            self.coef0 = float(self.coef0)
        self.tol = float(self.tol)
        self.max_iter = float(self.max_iter)

        self.shrinking = check_for_bool(self.shrinking)

        if check_none(self.class_weight):
            self.class_weight = None

        self.estimator = sklearn.svm.SVC(C=self.C,
                                         kernel=self.kernel,
                                         degree=self.degree,
                                         gamma=self.gamma,
                                         coef0=self.coef0,
                                         shrinking=self.shrinking,
                                         tol=self.tol,
                                         class_weight=self.class_weight,
                                         max_iter=self.max_iter,
                                         random_state=self.random_state,
                                         decision_function_shape='ovr')
        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 9
0
    def fit(self, X, Y):
        from sklearn.svm import SVR

        # Nested kernel
        if isinstance(self.kernel, tuple):
            nested_kernel = self.kernel
            self.kernel = nested_kernel[0]
            if self.kernel == 'poly':
                self.degree = nested_kernel[1]['degree']
                self.coef0 = nested_kernel[1]['coef0']
            elif self.kernel == 'sigmoid':
                self.coef0 = nested_kernel[1]['coef0']

        self.C = float(self.C)
        if self.degree is None:
            self.degree = 3
        else:
            self.degree = int(self.degree)
        if self.gamma is None:
            self.gamma = 0.0
        else:
            self.gamma = float(self.gamma)
        if self.coef0 is None:
            self.coef0 = 0.0
        else:
            self.coef0 = float(self.coef0)
        self.tol = float(self.tol)
        self.max_iter = float(self.max_iter)

        self.shrinking = check_for_bool(self.shrinking)

        self.estimator = SVR(C=self.C,
                             kernel=self.kernel,
                             degree=self.degree,
                             gamma=self.gamma,
                             coef0=self.coef0,
                             shrinking=self.shrinking,
                             tol=self.tol,
                             max_iter=self.max_iter)
        self.estimator.fit(X, Y)
        return self
Ejemplo n.º 10
0
    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.stochastic_gradient import SGDClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            if isinstance(self.loss, tuple):
                nested_loss = self.loss
                self.loss = nested_loss[0]
                if self.loss == 'modified_huber':
                    self.epsilon = nested_loss[1]['epsilon']

            if isinstance(self.penalty, tuple):
                nested_penalty = self.penalty
                self.penalty = nested_penalty[0]
                if self.penalty == "elasticnet":
                    self.l1_ratio = nested_penalty[1]['l1_ratio']

            if isinstance(self.learning_rate, tuple):
                nested_learning_rate = self.learning_rate
                self.learning_rate = nested_learning_rate[0]
                if self.learning_rate == 'invscaling':
                    self.eta0 = nested_learning_rate[1]['eta0']
                    self.power_t = nested_learning_rate[1]['power_t']
                elif self.learning_rate == 'constant':
                    self.eta0 = nested_learning_rate[1]['eta0']
                self.fully_fit_ = False

            self.alpha = float(self.alpha)
            self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \
                else 0.15
            self.epsilon = float(self.epsilon) if self.epsilon is not None \
                else 0.1
            self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01
            self.power_t = float(self.power_t) if self.power_t is not None \
                else 0.5
            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)

            self.estimator = SGDClassifier(loss=self.loss,
                                           penalty=self.penalty,
                                           alpha=self.alpha,
                                           fit_intercept=self.fit_intercept,
                                           max_iter=n_iter,
                                           tol=self.tol,
                                           learning_rate=self.learning_rate,
                                           l1_ratio=self.l1_ratio,
                                           epsilon=self.epsilon,
                                           eta0=self.eta0,
                                           power_t=self.power_t,
                                           shuffle=True,
                                           average=self.average,
                                           random_state=self.random_state,
                                           warm_start=True)
            self.estimator.fit(X, y, sample_weight=sample_weight)
        else:
            self.estimator.max_iter += n_iter
            self.estimator.max_iter = min(self.estimator.max_iter, 512)
            self.estimator._validate_params()
            self.estimator._partial_fit(
                X, y,
                alpha=self.estimator.alpha,
                C=1.0,
                loss=self.estimator.loss,
                learning_rate=self.estimator.learning_rate,
                max_iter=n_iter,
                sample_weight=sample_weight,
                classes=None,
                coef_init=None,
                intercept_init=None
            )

        if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_:
            self.fully_fit_ = True

        return self
Ejemplo n.º 11
0
    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            self.C = float(self.C)

            call_fit = True
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
            self.classes_ = np.unique(y.astype(int))
        else:
            call_fit = False

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.max_iter = 50
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            if call_fit:
                self.estimator.fit(X, y)
            else:
                self.estimator.max_iter += n_iter
                self.estimator.max_iter = min(self.estimator.max_iter, 1000)
                self.estimator._validate_params()
                lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                self.estimator._partial_fit(X,
                                            y,
                                            alpha=1.0,
                                            C=self.estimator.C,
                                            loss="hinge",
                                            learning_rate=lr,
                                            max_iter=n_iter,
                                            classes=None,
                                            sample_weight=sample_weight,
                                            coef_init=None,
                                            intercept_init=None)
                if (self.estimator.max_iter >= 1000
                        or n_iter > self.estimator.n_iter_):
                    self.fully_fit_ = True

        return self
Ejemplo n.º 12
0
    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.stochastic_gradient import SGDRegressor

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.alpha = float(self.alpha)
            if not check_none(self.epsilon_insensitive):
                self.epsilon_insensitive = float(self.epsilon_insensitive)
            self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \
                else 0.15
            self.epsilon_huber = float(self.epsilon_huber) if self.epsilon_huber is not None \
                else 0.1
            self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01
            self.power_t = float(self.power_t) if self.power_t is not None \
                else 0.5
            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            if self.loss == "huber":
                epsilon = self.epsilon_huber
            elif self.loss in [
                    "epsilon_insensitive", "squared_epsilon_insensitive"
            ]:
                epsilon = self.epsilon_insensitive
            else:
                epsilon = None
            self.estimator = SGDRegressor(loss=self.loss,
                                          penalty=self.penalty,
                                          alpha=self.alpha,
                                          fit_intercept=self.fit_intercept,
                                          max_iter=n_iter,
                                          tol=self.tol,
                                          learning_rate=self.learning_rate,
                                          l1_ratio=self.l1_ratio,
                                          epsilon=epsilon,
                                          eta0=self.eta0,
                                          power_t=self.power_t,
                                          shuffle=True,
                                          average=self.average,
                                          random_state=self.random_state,
                                          warm_start=True)
            self.estimator.fit(X, y, sample_weight=sample_weight)
        else:
            self.estimator.max_iter += n_iter
            self.estimator.max_iter = min(self.estimator.max_iter, 512)
            self.estimator._validate_params()
            self.estimator._partial_fit(
                X,
                y,
                alpha=self.estimator.alpha,
                C=1.0,
                loss=self.estimator.loss,
                learning_rate=self.estimator.learning_rate,
                max_iter=n_iter,
                sample_weight=sample_weight,
                coef_init=None,
                intercept_init=None)

        if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_:
            self.fully_fit_ = True

        return self