Ejemplo n.º 1
0
    def fit(self, X, Y, sample_weight=None):
        self.n_estimators = int(self.n_estimators)
        self.learning_rate = float(self.learning_rate)
        self.max_depth = int(self.max_depth)
        base_estimator = sklearn.tree.DecisionTreeClassifier(
            max_depth=self.max_depth)

        estimator = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=base_estimator,
            n_estimators=self.n_estimators,
            learning_rate=self.learning_rate,
            algorithm=self.algorithm,
            random_state=self.random_state)

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = MultilabelClassifier(estimator, n_jobs=1)
            self.estimator.fit(X, Y, sample_weight=sample_weight)
        else:
            self.estimator.fit(X, Y, sample_weight=sample_weight)

        return self
Ejemplo n.º 2
0
    def fit(self, X, Y, sample_weight=None):
        self.n_estimators = int(self.n_estimators)
        self.learning_rate = float(self.learning_rate)
        self.max_depth = int(self.max_depth)
        base_estimator = sklearn.tree.DecisionTreeClassifier(max_depth=self.max_depth)

        estimator = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=base_estimator,
            n_estimators=self.n_estimators,
            learning_rate=self.learning_rate,
            algorithm=self.algorithm,
            random_state=self.random_state
        )

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = MultilabelClassifier(estimator, n_jobs=1)
            self.estimator.fit(X, Y, sample_weight=sample_weight)
        else:
            self.estimator.fit(X, Y, sample_weight=sample_weight)

        return self
Ejemplo n.º 3
0
class AdaboostClassifier(ParamSklearnClassificationAlgorithm):

    def __init__(self, n_estimators, learning_rate, algorithm, max_depth,
                 random_state=None):
        self.n_estimators = int(n_estimators)
        self.learning_rate = float(learning_rate)
        self.algorithm = algorithm
        self.random_state = random_state
        self.max_depth = max_depth
        self.estimator = None

    def fit(self, X, Y, sample_weight=None):
        self.n_estimators = int(self.n_estimators)
        self.learning_rate = float(self.learning_rate)
        self.max_depth = int(self.max_depth)
        base_estimator = sklearn.tree.DecisionTreeClassifier(max_depth=self.max_depth)

        estimator = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=base_estimator,
            n_estimators=self.n_estimators,
            learning_rate=self.learning_rate,
            algorithm=self.algorithm,
            random_state=self.random_state
        )

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = MultilabelClassifier(estimator, n_jobs=1)
            self.estimator.fit(X, Y, sample_weight=sample_weight)
        else:
            self.estimator.fit(X, Y, sample_weight=sample_weight)

        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict_proba(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'AB',
                'name': 'AdaBoost Classifier',
                'handles_missing_values': False,
                'handles_nominal_values': False,
                'handles_numerical_features': True,
                'prefers_data_scaled': False,
                'prefers_data_normalized': False,
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': True,
                'handles_multilabel': True,
                'is_deterministic': True,
                'handles_sparse': False,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (PREDICTIONS,),
                # TODO find out what is best used here!
                # But rather fortran or C-contiguous?
                'preferred_dtype': np.float32}

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        # base_estimator = Constant(name="base_estimator", value="None")
        n_estimators = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="n_estimators", lower=50, upper=500, default=50, log=False))
        learning_rate = cs.add_hyperparameter(UniformFloatHyperparameter(
            name="learning_rate", lower=0.0001, upper=2, default=0.1, log=True))
        algorithm = cs.add_hyperparameter(CategoricalHyperparameter(
            name="algorithm", choices=["SAMME.R", "SAMME"], default="SAMME.R"))
        max_depth = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=10, default=1, log=False))
        return cs
Ejemplo n.º 4
0
class AdaboostClassifier(ParamSklearnClassificationAlgorithm):
    def __init__(self,
                 n_estimators,
                 learning_rate,
                 algorithm,
                 max_depth,
                 random_state=None):
        self.n_estimators = int(n_estimators)
        self.learning_rate = float(learning_rate)
        self.algorithm = algorithm
        self.random_state = random_state
        self.max_depth = max_depth
        self.estimator = None

    def fit(self, X, Y, sample_weight=None):
        self.n_estimators = int(self.n_estimators)
        self.learning_rate = float(self.learning_rate)
        self.max_depth = int(self.max_depth)
        base_estimator = sklearn.tree.DecisionTreeClassifier(
            max_depth=self.max_depth)

        estimator = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=base_estimator,
            n_estimators=self.n_estimators,
            learning_rate=self.learning_rate,
            algorithm=self.algorithm,
            random_state=self.random_state)

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = MultilabelClassifier(estimator, n_jobs=1)
            self.estimator.fit(X, Y, sample_weight=sample_weight)
        else:
            self.estimator.fit(X, Y, sample_weight=sample_weight)

        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict_proba(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'AB',
            'name': 'AdaBoost Classifier',
            'handles_missing_values': False,
            'handles_nominal_values': False,
            'handles_numerical_features': True,
            'prefers_data_scaled': False,
            'prefers_data_normalized': False,
            'handles_regression': False,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'is_deterministic': True,
            'handles_sparse': False,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, ),
            # TODO find out what is best used here!
            # But rather fortran or C-contiguous?
            'preferred_dtype': np.float32
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        # base_estimator = Constant(name="base_estimator", value="None")
        n_estimators = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="n_estimators",
                                         lower=50,
                                         upper=500,
                                         default=50,
                                         log=False))
        learning_rate = cs.add_hyperparameter(
            UniformFloatHyperparameter(name="learning_rate",
                                       lower=0.0001,
                                       upper=2,
                                       default=0.1,
                                       log=True))
        algorithm = cs.add_hyperparameter(
            CategoricalHyperparameter(name="algorithm",
                                      choices=["SAMME.R", "SAMME"],
                                      default="SAMME.R"))
        max_depth = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="max_depth",
                                         lower=1,
                                         upper=10,
                                         default=1,
                                         log=False))
        return cs