def test_is_categorical(self): x = Series(["True", "False", "True"], name="x", dtype=str) self.assertTrue(_is_categorical(x.dtype)) x = x.astype(bool) self.assertTrue(_is_categorical(x.dtype)) x = x.astype(float) self.assertFalse(_is_categorical(x.dtype)) x = x.astype("category") self.assertTrue(_is_categorical(x.dtype)) x = x.astype(int) self.assertFalse(_is_categorical(x.dtype))
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware=True): """Construct a DataFrameMapper for feeding complex data into an XGBModel. Parameters ---------- dtypes: iterable of tuples (column, dtype) missing_value_aware: boolean If true, use missing value aware transformers. Returns ------- DataFrameMapper """ features = list() for column, dtype in dtypes.items(): if _is_categorical(dtype): features.append(([column], PMMLLabelBinarizer( sparse_output=True) if missing_value_aware else LabelBinarizer( sparse_output=True))) else: features.append(([column], None)) return DataFrameMapper(features)
def make_xgboost_column_transformer(dtypes, missing_value_aware=True): """Construct a ColumnTransformer for feeding complex data into an XGBModel. Parameters ---------- dtypes: iterable of tuples (column, dtype) missing_value_aware: boolean If true, use missing value aware transformers. Returns ------- ColumnTransformer """ transformers = list() for column, dtype in dtypes.items(): if _is_categorical(dtype): transformers.append( (column, PMMLLabelBinarizer(sparse_output=True) if missing_value_aware else Pipeline([("ordinal_encoder", OrdinalEncoder()), ("one_hot_encoder", OneHotEncoder())]), [column])) else: transformers.append((column, "passthrough", [column])) return ColumnTransformer(transformers, remainder="drop")
def make_lightgbm_dataframe_mapper(dtypes, missing_value_aware=True): """Construct a DataFrameMapper for feeding complex data into a LGBMModel. Parameters ---------- dtypes: iterable of tuples (column, dtype) missing_value_aware: boolean If true, use missing value aware transformers. Returns ------- Tuple (DataFrameMapper, list of categorical columns indices) """ features = list() categorical_features = list() i = 0 for column, dtype in dtypes.items(): if _is_categorical(dtype): features.append(([column], PMMLLabelEncoder( missing_values=-1) if missing_value_aware else LabelEncoder())) categorical_features.append(i) else: features.append(([column], None)) i += 1 return (DataFrameMapper(features), categorical_features)
def make_lightgbm_column_transformer(dtypes, missing_value_aware=True): """Construct a ColumnTransformer for feeding complex data into a LGBMModel. Parameters ---------- dtypes: iterable of tuples (column, dtype) missing_value_aware: boolean If true, use missing value aware transformers. Returns: Tuple (ColumnTransformer, list of categorical column indices) """ transformers = list() categorical_features = list() i = 0 for column, dtype in dtypes.items(): if _is_categorical(dtype): transformers.append( (column, PMMLLabelEncoder(missing_values=-1) if missing_value_aware else OrdinalEncoder(), [column])) categorical_features.append(i) else: transformers.append((column, "passthrough", [column])) i += 1 return (ColumnTransformer(transformers, remainder="drop"), categorical_features)