def test_use_categorical_feature(self):
     # 测试category
     lgbm = LGBMClassifier(n_estimators=2000, verbose=100)
     lgbm.fit(self.X_train,
              self.y_train,
              self.X_test,
              self.y_test,
              categorical_feature=self.cat_indexes.tolist())
     print(lgbm.score(self.X_test, self.y_test))
 def test_sample_weight(self):
     lgbm = LGBMClassifier(n_estimators=2000, verbose=100)
     sample_weight = calc_balanced_sample_weight(self.y_train)
     lgbm.fit(self.X_train,
              self.y_train,
              self.X_test,
              self.y_test,
              sample_weight=sample_weight)
     print(lgbm.score(self.X_test, self.y_test))
Exemple #3
0
 def get_model(self):
     lgbm_params = dict(boosting_type="gbdt",
                        learning_rate=0.1,
                        max_depth=31,
                        num_leaves=31,
                        feature_fraction=0.8,
                        bagging_fraction=0.8,
                        bagging_freq=1,
                        random_state=self.random_state,
                        lambda_l1=0.1,
                        lambda_l2=0.2,
                        subsample_for_bin=40000,
                        min_child_weight=0.01,
                        verbose=-1,
                        n_jobs=self.n_jobs,
                        n_estimators=10)
     rf_params = dict(n_estimators=10,
                      random_state=self.random_state,
                      n_jobs=self.n_jobs,
                      min_samples_leaf=10,
                      min_samples_split=10)
     ridge_params = dict(normalize=True, random_state=self.random_state)
     lasso_params = dict(normalize=True, random_state=self.random_state)
     lr_params = dict(penalty="l1", solver="saga", C=0.1)
     if self.model_params:
         for params in (lgbm_params, rf_params, ridge_params, lasso_params,
                        lr_params):
             params.update(self.model_params)
     base_model = self.base_model
     is_classification = self.is_classification
     if base_model == "lgbm":
         if is_classification:
             return LGBMClassifier(**lgbm_params)
         else:
             return LGBMRegressor(**lgbm_params)
     elif base_model == "rf":
         if is_classification:
             return RandomForestClassifier(**rf_params)
         else:
             return RandomForestRegressor(**rf_params)
     elif base_model == "et":
         if is_classification:
             return ExtraTreesClassifier(**rf_params)
         else:
             return ExtraTreesRegressor(**rf_params)
     elif base_model == "ridge":
         if is_classification:
             return RidgeClassifier(**ridge_params)
         else:
             return Ridge(**ridge_params)
     elif base_model == "lasso":
         if is_classification:
             return LogisticRegression(**lr_params)
         else:
             return Lasso(**lasso_params)
     else:
         raise ValueError(f"Unknown base_model {base_model}")
 def test_warm_start(self):
     lgbm = LGBMClassifier(verbose=16)
     # 0.8764 1618
     # 0.8749 1557
     for n_estimators in [128, 512, 2048, 4096]:
         lgbm.n_estimators = n_estimators
         lgbm.fit(self.X_train, self.y_train, self.X_test, self.y_test)
         acc = lgbm.score(self.X_test, self.y_test)
         print(f"n_estimator = {n_estimators}, accuracy = {acc:.4f}")
 def test_multiclass(self):
     X, y = load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = train_test_split(X,
                                                         y,
                                                         test_size=0.33,
                                                         random_state=42)
     lgbm = LGBMClassifier(n_estimators=5000, verbose=100)
     lgbm.fit(X_train, y_train, X_test, y_test)
     print(lgbm.score(X_test, y_test))
     y_score = lgbm.predict_proba(X_test)
     assert y_score.shape[1] == 10
     assert np.all(np.abs(y_score.sum(axis=1) - 1) < 1e5)
 def test_no_validate_set(self):
     lgbm = LGBMClassifier(n_estimators=100, verbose=10)
     lgbm.fit(self.X_train, self.y_train)
     print(lgbm.score(self.X_test, self.y_test))
Exemple #7
0
        self.logger.info(
            f"{model.__class__.__name__}{msg} max_iters = {model.n_estimators}, "
            f"budget = {budget:.2f}, cost_time = {cost_time:.2f} .")

        feature_importance = feat_imp_callback(model)
        scaler = MinMaxScaler()
        feature_importance_ = scaler.fit_transform(
            feature_importance[:, None]).flatten()

        return model, cost_time, feature_importance_


if __name__ == '__main__':
    from sklearn.datasets import load_digits
    from autoflow.utils.logging_ import setup_logger
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import train_test_split

    setup_logger()
    X, y = load_digits(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    pipe = Pipeline([("selector", AdaptiveFeatureSelector(percentage=50)),
                     ("lgbm", LGBMClassifier(n_estimators=500))])
    pipe.fit(X_train, y_train)
    score = pipe.score(X_test, y_test)
    print(score)