def test_fit_transform(self): X = _data() ct = make_xgboost_column_transformer(X.dtypes, missing_value_aware = False) dfm = make_xgboost_column_transformer(X.dtypes, missing_value_aware = False) self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist()) ct = make_xgboost_column_transformer(X.dtypes, missing_value_aware = True) dfm = make_xgboost_column_transformer(X.dtypes, missing_value_aware = True) self.assertEqual(ct.fit_transform(X).tolist(), dfm.fit_transform(X).tolist())
for cont_column in cont_columns]) dtypes = Series(dtypes.values, index=[0, 1, 2, 3, 4, 5, 6]) lightgbm_mapper, lightgbm_categorical_feature = make_lightgbm_column_transformer( dtypes, missing_value_aware=True) lightgbm_pipeline = Pipeline([ ("mapper", lightgbm_mapper), ("classifier", LGBMClassifier(n_estimators=31, max_depth=3, random_state=13, categorical_feature=lightgbm_categorical_feature)) ]) xgboost_mapper = make_xgboost_column_transformer(dtypes, missing_value_aware=True) xgboost_pipeline = Pipeline([("mapper", xgboost_mapper), ("classifier", XGBClassifier(n_estimators=31, learning_rate=0.1, max_depth=3, random_state=13))]) sklearn_mapper = ColumnTransformer( [(str(cat_index), PMMLLabelBinarizer(sparse_output=False), [cat_index]) for cat_index in range(0, len(cat_columns))] + [(str(cont_index), "passthrough", [cont_index]) for cont_index in range(len(cat_columns), len(cat_columns + cont_columns)) ], remainder="drop")