예제 #1
0
class PassiveAggressiveClassifierImpl():

    def __init__(self, C=1.0, fit_intercept=True, max_iter=None, tol=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, shuffle=True, verbose=0, loss='hinge', n_jobs=None, random_state=None, warm_start=False, class_weight='balanced', average=False, n_iter=None):
        self._hyperparams = {
            'C': C,
            'fit_intercept': fit_intercept,
            'max_iter': max_iter,
            'tol': tol,
            'early_stopping': early_stopping,
            'validation_fraction': validation_fraction,
            'n_iter_no_change': n_iter_no_change,
            'shuffle': shuffle,
            'verbose': verbose,
            'loss': loss,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'warm_start': warm_start,
            'class_weight': class_weight,
            'average': average,
            'n_iter': n_iter}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
예제 #2
0
def PassiveAggressive_classify(params, dataset, seed, classify):
    model_name = "PassiveAggressive"
    print(model_name, params, dataset, seed)
    np.random.seed(108)
    start_time = timeit.default_timer()
    train_X, train_y, test_X, test_y = gen_train_test_data(dataset, seed)
    # build a classifier based on selected parameters
    # C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
    model = PassiveAggressiveClassifier(C=np.exp(params["C"]),
                                        max_iter=1000,
                                        tol=1e-3,
                                        random_state=108)
    if classify == "test":
        model.fit(train_X, train_y)
        pred_y = model.predict(test_X)
        # maximize accuracy
        auc = accuracy_score(test_y, pred_y)
    if classify == "cv":
        scores = cross_val_score(model, train_X, train_y, cv=cv_train)
        auc = np.mean(scores)
    # minimize loss
    loss = 1.0 - auc
    end_time = timeit.default_timer()
    print("{}_runtime: {}(s)".format(model_name, round(end_time - start_time,
                                                       2)))
    del model

    # dictionary with information for evaluation
    return {'auc': auc, 'loss': loss, 'status': STATUS_OK}
예제 #3
0
class PassiveAggressive:
    def __init__(self,
                 C,
                 fit_intercept,
                 tol,
                 loss,
                 average,
                 random_state=None):
        self.C = C
        self.fit_intercept = fit_intercept
        self.average = average
        self.tol = tol
        self.loss = loss
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y, sample_weight=None):
        self.iterative_fit(X,
                           y,
                           n_iter=2,
                           refit=True,
                           sample_weight=sample_weight)
        iteration = 2
        while not self.configuration_fully_fitted():
            n_iter = int(2**iteration / 2)
            self.iterative_fit(X,
                               y,
                               n_iter=n_iter,
                               sample_weight=sample_weight)
            iteration += 1
        return self

    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            self.C = float(self.C)

            call_fit = True
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
            self.classes_ = np.unique(y.astype(int))
        else:
            call_fit = False

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.max_iter = 50
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            if call_fit:
                self.estimator.fit(X, y)
            else:
                self.estimator.max_iter += n_iter
                self.estimator.max_iter = min(self.estimator.max_iter, 1000)
                self.estimator._validate_params()
                lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                self.estimator._partial_fit(X,
                                            y,
                                            alpha=1.0,
                                            C=self.estimator.C,
                                            loss="hinge",
                                            learning_rate=lr,
                                            max_iter=n_iter,
                                            classes=None,
                                            sample_weight=sample_weight,
                                            coef_init=None,
                                            intercept_init=None)
                if (self.estimator._max_iter >= 1000
                        or n_iter > self.estimator.n_iter_):
                    self.fully_fit_ = True

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)
예제 #4
0
class PassiveAggressive(AutoSklearnClassificationAlgorithm):
    def __init__(self,
                 C,
                 fit_intercept,
                 tol,
                 loss,
                 average,
                 random_state=None):
        self.C = float(C)
        self.fit_intercept = fit_intercept == 'True'
        self.tol = float(tol)
        self.loss = loss
        self.average = average == 'True'
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y):
        n_iter = 2
        self.iterative_fit(X, y, n_iter=n_iter, refit=True)
        while not self.configuration_fully_fitted():
            n_iter *= 2
            self.iterative_fit(X, y, n_iter=n_iter)

        return self

    def iterative_fit(self, X, y, n_iter=2, refit=False):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            call_fit = True
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
            self.classes_ = np.unique(y.astype(int))
        else:
            call_fit = False

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.max_iter = 50
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            if call_fit:
                self.estimator.fit(X, y)
            else:
                self.estimator.max_iter += n_iter
                self.estimator.max_iter = min(self.estimator.max_iter, 1000)
                self.estimator._validate_params()
                lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                self.estimator._partial_fit(X,
                                            y,
                                            alpha=1.0,
                                            C=self.estimator.C,
                                            loss="hinge",
                                            learning_rate=lr,
                                            max_iter=n_iter,
                                            classes=None,
                                            sample_weight=None,
                                            coef_init=None,
                                            intercept_init=None)
                if (self.estimator._max_iter >= 1000
                        or n_iter > self.estimator.n_iter_):
                    self.fully_fit_ = True

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'PassiveAggressive Classifier',
            'name': 'Passive Aggressive Classifier',
            'handles_regression': False,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'is_deterministic': True,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, )
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"],
                                         default_value="hinge")

        tol = UniformFloatHyperparameter("tol",
                                         1e-5,
                                         1e-1,
                                         default_value=1e-4,
                                         log=True)
        average = CategoricalHyperparameter('average', [False, True])

        cs = ConfigurationSpace()
        cs.add_hyperparameters([loss, fit_intercept, tol, C, average])
        return cs
예제 #5
0
class PassiveAggressive(ParamSklearnClassificationAlgorithm):
    def __init__(self, C, fit_intercept, n_iter, loss, random_state=None):
        self.C = float(C)
        self.fit_intercept = fit_intercept == 'True'
        self.n_iter = int(n_iter)
        self.loss = loss
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y):
        while not self.configuration_fully_fitted():
            self.iterative_fit(X, y, n_iter=1)

        return self

    def iterative_fit(self, X, y, n_iter=1, refit=False):
        if refit:
            self.estimator = None

        if self.estimator is None:
            self.estimator = PassiveAggressiveClassifier(
                C=self.C, fit_intercept=self.fit_intercept, n_iter=1,
                loss=self.loss, shuffle=True, random_state=self.random_state,
                warm_start=True)
            self.classes_ = np.unique(y.astype(int))

        self.estimator.n_iter += n_iter
        self.estimator.fit(X, y)

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        return not self.estimator.n_iter < self.n_iter

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'PassiveAggressive Classifier',
                'name': 'Passive Aggressive Stochastic Gradient Descent '
                        'Classifier',
                'handles_missing_values': False,
                'handles_nominal_values': False,
                'handles_numerical_features': True,
                'prefers_data_scaled': True,
                'prefers_data_normalized': True,
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': True,
                'handles_multilabel': False,
                'is_deterministic': True,
                'handles_sparse': True,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (PREDICTIONS,),
                # TODO find out what is best used here!
                'preferred_dtype': None}

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        loss = CategoricalHyperparameter("loss",
                                         ["hinge", "squared_hinge"],
                                         default="hinge")
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        n_iter = UniformIntegerHyperparameter("n_iter", 5, 1000, default=20)
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1, log=True)
        cs = ConfigurationSpace()
        cs.add_hyperparameter(loss)
        cs.add_hyperparameter(fit_intercept)
        cs.add_hyperparameter(n_iter)
        cs.add_hyperparameter(C)
        return cs
예제 #6
0
class PassiveAggressive(
        IterativeComponentWithSampleWeight,
        BaseClassificationModel,
):
    def __init__(self,
                 C,
                 fit_intercept,
                 tol,
                 loss,
                 average,
                 random_state=None):
        self.C = C
        self.fit_intercept = fit_intercept
        self.average = average
        self.tol = tol
        self.loss = loss
        self.random_state = random_state
        self.estimator = None
        self.time_limit = None
        self.start_time = time.time()

    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            self.C = float(self.C)

            call_fit = True
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
            self.classes_ = np.unique(y.astype(int))
        else:
            call_fit = False

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.max_iter = 50
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            if call_fit:
                self.estimator.fit(X, y)
            else:
                self.estimator.max_iter += n_iter
                self.estimator.max_iter = min(self.estimator.max_iter, 1000)
                self.estimator._validate_params()
                lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                self.estimator._partial_fit(X,
                                            y,
                                            alpha=1.0,
                                            C=self.estimator.C,
                                            loss="hinge",
                                            learning_rate=lr,
                                            max_iter=n_iter,
                                            classes=None,
                                            sample_weight=sample_weight,
                                            coef_init=None,
                                            intercept_init=None)
                if (self.estimator.max_iter >= 1000
                        or n_iter > self.estimator.n_iter_):
                    self.fully_fit_ = True

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'PassiveAggressive Classifier',
            'name': 'Passive Aggressive Classifier',
            'handles_regression': False,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'is_deterministic': True,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, )
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
            fit_intercept = UnParametrizedHyperparameter(
                "fit_intercept", "True")
            loss = CategoricalHyperparameter("loss",
                                             ["hinge", "squared_hinge"],
                                             default_value="hinge")

            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             default_value=1e-4,
                                             log=True)
            # Note: Average could also be an Integer if > 1
            average = CategoricalHyperparameter('average', ['False', 'True'],
                                                default_value='False')

            cs = ConfigurationSpace()
            cs.add_hyperparameters([loss, fit_intercept, tol, C, average])
            return cs
        elif optimizer == 'tpe':
            space = {
                'C': hp.loguniform("pa_C", np.log(1e-5), np.log(10)),
                'fit_intercept': hp.choice('pa_fit_intercept', ["True"]),
                'loss': hp.choice('pr_loss', ["hinge", "squared_hinge"]),
                'tol': hp.loguniform('pr_tol', np.log(1e-5), np.log(1e-1)),
                'average': hp.choice('pr_average', ["False", "True"])
            }

            init_trial = {
                'C': 1,
                'fit_intercept': "True",
                'loss': "hinge",
                'tol': 1e-4,
                'average': "False"
            }

            return space
예제 #7
0
class PassiveAggressive(AutoSklearnClassificationAlgorithm):
    def __init__(self, C, fit_intercept, n_iter, loss, random_state=None):
        super(PassiveAggressive, self).__init__()
        self.C = float(C)
        self.fit_intercept = fit_intercept == 'True'
        self.n_iter = int(n_iter)
        self.loss = loss
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y):
        while not self.configuration_fully_fitted():
            self.iterative_fit(X, y, n_iter=1)

        return self

    def iterative_fit(self, X, y, n_iter=1, refit=False):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        if refit:
            self.estimator = None

        if self.estimator is None:
            self._iterations = 0

            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                n_iter=1,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True)
            self.classes_ = np.unique(y.astype(int))

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.n_iter = self.n_iter
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            # In the first iteration, there is not yet an intercept

            self.estimator.n_iter = n_iter
            self.estimator.partial_fit(X, y, classes=np.unique(y))
            if self._iterations >= self.n_iter:
                self.fully_fit_ = True
            self._iterations += n_iter

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'PassiveAggressive Classifier',
            'name': 'Passive Aggressive Classifier',
            'handles_regression': False,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'is_deterministic': True,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, )
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"],
                                         default="hinge")
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        n_iter = UniformIntegerHyperparameter("n_iter",
                                              5,
                                              1000,
                                              default=20,
                                              log=True)
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1, log=True)
        cs = ConfigurationSpace()
        cs.add_hyperparameter(loss)
        cs.add_hyperparameter(fit_intercept)
        cs.add_hyperparameter(n_iter)
        cs.add_hyperparameter(C)
        return cs
예제 #8
0
class PassiveAggressive(
    IterativeComponentWithSampleWeight,
    AutoSklearnClassificationAlgorithm,
):
    def __init__(self, C, fit_intercept, tol, loss, average, random_state=None):
        self.C = C
        self.fit_intercept = fit_intercept
        self.average = average
        self.tol = tol
        self.loss = loss
        self.random_state = random_state
        self.estimator = None


    def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        # Need to fit at least two iterations, otherwise early stopping will not
        # work because we cannot determine whether the algorithm actually
        # converged. The only way of finding this out is if the sgd spends less
        # iterations than max_iter. If max_iter == 1, it has to spend at least
        # one iteration and will always spend at least one iteration, so we
        # cannot know about convergence.

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.fully_fit_ = False

            self.average = check_for_bool(self.average)
            self.fit_intercept = check_for_bool(self.fit_intercept)
            self.tol = float(self.tol)
            self.C = float(self.C)

            call_fit = True
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                max_iter=n_iter,
                tol=self.tol,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True,
                average=self.average,
            )
            self.classes_ = np.unique(y.astype(int))
        else:
            call_fit = False

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.max_iter = 50
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            if call_fit:
                self.estimator.fit(X, y)
            else:
                self.estimator.max_iter += n_iter
                self.estimator.max_iter = min(self.estimator.max_iter,
                                              1000)
                self.estimator._validate_params()
                lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                self.estimator._partial_fit(
                    X, y,
                    alpha=1.0,
                    C=self.estimator.C,
                    loss="hinge",
                    learning_rate=lr,
                    max_iter=n_iter,
                    classes=None,
                    sample_weight=sample_weight,
                    coef_init=None,
                    intercept_init=None
                )
                if (
                    self.estimator._max_iter >= 1000
                    or n_iter > self.estimator.n_iter_
                ):
                    self.fully_fit_ = True

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'PassiveAggressive Classifier',
                'name': 'Passive Aggressive Classifier',
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': True,
                'handles_multilabel': True,
                'is_deterministic': True,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (PREDICTIONS,)}

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        loss = CategoricalHyperparameter(
            "loss", ["hinge", "squared_hinge"], default_value="hinge"
        )

        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4,
                                         log=True)
        # Note: Average could also be an Integer if > 1
        average = CategoricalHyperparameter('average', ['False', 'True'],
                                            default_value='False')

        cs = ConfigurationSpace()
        cs.add_hyperparameters([loss, fit_intercept, tol, C, average])
        return cs
예제 #9
0
class PassiveAggressive(AutoSklearnClassificationAlgorithm):
    def __init__(self, C, fit_intercept, n_iter, loss, random_state=None):
        self.C = float(C)
        self.fit_intercept = fit_intercept == 'True'
        self.n_iter = int(n_iter)
        self.loss = loss
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y):
        while not self.configuration_fully_fitted():
            self.iterative_fit(X, y, n_iter=1)

        return self

    def iterative_fit(self, X, y, n_iter=1, refit=False):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        if refit:
            self.estimator = None

        if self.estimator is None:
            self.estimator = PassiveAggressiveClassifier(
                C=self.C,
                fit_intercept=self.fit_intercept,
                n_iter=1,
                loss=self.loss,
                shuffle=True,
                random_state=self.random_state,
                warm_start=True)
            self.classes_ = np.unique(y.astype(int))

        self.estimator.n_iter += n_iter
        self.estimator.fit(X, y)

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        return not self.estimator.n_iter < self.n_iter

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'PassiveAggressive Classifier',
            'name': 'Passive Aggressive Stochastic Gradient Descent '
            'Classifier',
            'handles_missing_values': False,
            'handles_nominal_values': False,
            'handles_numerical_features': True,
            'prefers_data_scaled': True,
            'prefers_data_normalized': True,
            'handles_regression': False,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': False,
            'is_deterministic': True,
            'handles_sparse': True,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, ),
            # TODO find out what is best used here!
            'preferred_dtype': None
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"],
                                         default="hinge")
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        n_iter = UniformIntegerHyperparameter("n_iter",
                                              5,
                                              1000,
                                              default=20,
                                              log=True)
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1, log=True)
        cs = ConfigurationSpace()
        cs.add_hyperparameter(loss)
        cs.add_hyperparameter(fit_intercept)
        cs.add_hyperparameter(n_iter)
        cs.add_hyperparameter(C)
        return cs
class PassiveAggressive(AutoSklearnClassificationAlgorithm):
    def __init__(self, C, fit_intercept, n_iter, loss, random_state=None):
        super(PassiveAggressive, self).__init__()
        self.C = float(C)
        self.fit_intercept = fit_intercept == 'True'
        self.n_iter = int(n_iter)
        self.loss = loss
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, y):
        while not self.configuration_fully_fitted():
            self.iterative_fit(X, y, n_iter=1)

        return self

    def iterative_fit(self, X, y, n_iter=1, refit=False):
        from sklearn.linear_model.passive_aggressive import \
            PassiveAggressiveClassifier

        if refit:
            self.estimator = None

        if self.estimator is None:
            self._iterations = 0

            self.estimator = PassiveAggressiveClassifier(
                C=self.C, fit_intercept=self.fit_intercept, n_iter=1,
                loss=self.loss, shuffle=True, random_state=self.random_state,
                warm_start=True)
            self.classes_ = np.unique(y.astype(int))

        # Fallback for multilabel classification
        if len(y.shape) > 1 and y.shape[1] > 1:
            import sklearn.multiclass
            self.estimator.n_iter = self.n_iter
            self.estimator = sklearn.multiclass.OneVsRestClassifier(
                self.estimator, n_jobs=1)
            self.estimator.fit(X, y)
            self.fully_fit_ = True
        else:
            # In the first iteration, there is not yet an intercept

            self.estimator.n_iter = n_iter
            self.estimator.partial_fit(X, y, classes=np.unique(y))
            if self._iterations >= self.n_iter:
                self.fully_fit_ = True
            self._iterations += n_iter

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        elif not hasattr(self, 'fully_fit_'):
            return False
        else:
            return self.fully_fit_

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()

        df = self.estimator.decision_function(X)
        return softmax(df)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'PassiveAggressive Classifier',
                'name': 'Passive Aggressive Classifier',
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': True,
                'handles_multilabel': True,
                'is_deterministic': True,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (PREDICTIONS,)}

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        loss = CategoricalHyperparameter("loss",
                                         ["hinge", "squared_hinge"],
                                         default="hinge")
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        n_iter = UniformIntegerHyperparameter("n_iter", 5, 1000, default=20,
                                              log=True)
        C = UniformFloatHyperparameter("C", 1e-5, 10, 1, log=True)
        cs = ConfigurationSpace()
        cs.add_hyperparameter(loss)
        cs.add_hyperparameter(fit_intercept)
        cs.add_hyperparameter(n_iter)
        cs.add_hyperparameter(C)
        return cs