Beispiel #1
0
	def test_fit_transform(self):
		X = _data()
		ct, ct_categorical_feature = make_lightgbm_column_transformer(X.dtypes, missing_value_aware = False)
		dfm, dfm_categorical_feature = make_lightgbm_dataframe_mapper(X.dtypes, missing_value_aware = False)
		self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist())
		self.assertEqual([0, 1, 3], ct_categorical_feature)
		self.assertEqual([0, 1, 3], dfm_categorical_feature)
		ct, ct_categorical_feature = make_lightgbm_column_transformer(X.dtypes, missing_value_aware = True)
		dfm, dfm_categorical_feature = make_lightgbm_dataframe_mapper(X.dtypes, missing_value_aware = True)
		self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist())
		self.assertEqual([0, 1, 3], ct_categorical_feature)
		self.assertEqual([0, 1, 3], dfm_categorical_feature)
cat_columns = ["Education", "Employment", "Marital", "Occupation"]
cont_columns = ["Age", "Hours", "Income"]

df_X = df[cat_columns + cont_columns]
df_y = df["Adjusted"]

dtypes = df_X.dtypes

mapper = ColumnTransformer([(cat_column, CategoricalDomain(), [cat_column])
                            for cat_column in cat_columns] +
                           [(cont_column, ContinuousDomain(), [cont_column])
                            for cont_column in cont_columns])

dtypes = Series(dtypes.values, index=[0, 1, 2, 3, 4, 5, 6])

lightgbm_mapper, lightgbm_categorical_feature = make_lightgbm_column_transformer(
    dtypes, missing_value_aware=True)
lightgbm_pipeline = Pipeline([
    ("mapper", lightgbm_mapper),
    ("classifier",
     LGBMClassifier(n_estimators=31,
                    max_depth=3,
                    random_state=13,
                    categorical_feature=lightgbm_categorical_feature))
])

xgboost_mapper = make_xgboost_column_transformer(dtypes,
                                                 missing_value_aware=True)
xgboost_pipeline = Pipeline([("mapper", xgboost_mapper),
                             ("classifier",
                              XGBClassifier(n_estimators=31,
                                            learning_rate=0.1,