def test_use_categorical_feature(self): # 测试category lgbm = LGBMClassifier(n_estimators=2000, verbose=100) lgbm.fit(self.X_train, self.y_train, self.X_test, self.y_test, categorical_feature=self.cat_indexes.tolist()) print(lgbm.score(self.X_test, self.y_test))
def test_sample_weight(self): lgbm = LGBMClassifier(n_estimators=2000, verbose=100) sample_weight = calc_balanced_sample_weight(self.y_train) lgbm.fit(self.X_train, self.y_train, self.X_test, self.y_test, sample_weight=sample_weight) print(lgbm.score(self.X_test, self.y_test))
def get_model(self): lgbm_params = dict(boosting_type="gbdt", learning_rate=0.1, max_depth=31, num_leaves=31, feature_fraction=0.8, bagging_fraction=0.8, bagging_freq=1, random_state=self.random_state, lambda_l1=0.1, lambda_l2=0.2, subsample_for_bin=40000, min_child_weight=0.01, verbose=-1, n_jobs=self.n_jobs, n_estimators=10) rf_params = dict(n_estimators=10, random_state=self.random_state, n_jobs=self.n_jobs, min_samples_leaf=10, min_samples_split=10) ridge_params = dict(normalize=True, random_state=self.random_state) lasso_params = dict(normalize=True, random_state=self.random_state) lr_params = dict(penalty="l1", solver="saga", C=0.1) if self.model_params: for params in (lgbm_params, rf_params, ridge_params, lasso_params, lr_params): params.update(self.model_params) base_model = self.base_model is_classification = self.is_classification if base_model == "lgbm": if is_classification: return LGBMClassifier(**lgbm_params) else: return LGBMRegressor(**lgbm_params) elif base_model == "rf": if is_classification: return RandomForestClassifier(**rf_params) else: return RandomForestRegressor(**rf_params) elif base_model == "et": if is_classification: return ExtraTreesClassifier(**rf_params) else: return ExtraTreesRegressor(**rf_params) elif base_model == "ridge": if is_classification: return RidgeClassifier(**ridge_params) else: return Ridge(**ridge_params) elif base_model == "lasso": if is_classification: return LogisticRegression(**lr_params) else: return Lasso(**lasso_params) else: raise ValueError(f"Unknown base_model {base_model}")
def test_warm_start(self): lgbm = LGBMClassifier(verbose=16) # 0.8764 1618 # 0.8749 1557 for n_estimators in [128, 512, 2048, 4096]: lgbm.n_estimators = n_estimators lgbm.fit(self.X_train, self.y_train, self.X_test, self.y_test) acc = lgbm.score(self.X_test, self.y_test) print(f"n_estimator = {n_estimators}, accuracy = {acc:.4f}")
def test_multiclass(self): X, y = load_digits(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) lgbm = LGBMClassifier(n_estimators=5000, verbose=100) lgbm.fit(X_train, y_train, X_test, y_test) print(lgbm.score(X_test, y_test)) y_score = lgbm.predict_proba(X_test) assert y_score.shape[1] == 10 assert np.all(np.abs(y_score.sum(axis=1) - 1) < 1e5)
def test_no_validate_set(self): lgbm = LGBMClassifier(n_estimators=100, verbose=10) lgbm.fit(self.X_train, self.y_train) print(lgbm.score(self.X_test, self.y_test))
self.logger.info( f"{model.__class__.__name__}{msg} max_iters = {model.n_estimators}, " f"budget = {budget:.2f}, cost_time = {cost_time:.2f} .") feature_importance = feat_imp_callback(model) scaler = MinMaxScaler() feature_importance_ = scaler.fit_transform( feature_importance[:, None]).flatten() return model, cost_time, feature_importance_ if __name__ == '__main__': from sklearn.datasets import load_digits from autoflow.utils.logging_ import setup_logger from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split setup_logger() X, y = load_digits(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) pipe = Pipeline([("selector", AdaptiveFeatureSelector(percentage=50)), ("lgbm", LGBMClassifier(n_estimators=500))]) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print(score)