Exemple #1
0
 def test_is_categorical(self):
     x = Series(["True", "False", "True"], name="x", dtype=str)
     self.assertTrue(_is_categorical(x.dtype))
     x = x.astype(bool)
     self.assertTrue(_is_categorical(x.dtype))
     x = x.astype(float)
     self.assertFalse(_is_categorical(x.dtype))
     x = x.astype("category")
     self.assertTrue(_is_categorical(x.dtype))
     x = x.astype(int)
     self.assertFalse(_is_categorical(x.dtype))
Exemple #2
0
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware=True):
    """Construct a DataFrameMapper for feeding complex data into an XGBModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	DataFrameMapper

	"""
    features = list()
    for column, dtype in dtypes.items():
        if _is_categorical(dtype):
            features.append(([column], PMMLLabelBinarizer(
                sparse_output=True) if missing_value_aware else LabelBinarizer(
                    sparse_output=True)))
        else:
            features.append(([column], None))
    return DataFrameMapper(features)
Exemple #3
0
def make_xgboost_column_transformer(dtypes, missing_value_aware=True):
    """Construct a ColumnTransformer for feeding complex data into an XGBModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	ColumnTransformer

	"""
    transformers = list()
    for column, dtype in dtypes.items():
        if _is_categorical(dtype):
            transformers.append(
                (column, PMMLLabelBinarizer(sparse_output=True)
                 if missing_value_aware else Pipeline([("ordinal_encoder",
                                                        OrdinalEncoder()),
                                                       ("one_hot_encoder",
                                                        OneHotEncoder())]),
                 [column]))
        else:
            transformers.append((column, "passthrough", [column]))
    return ColumnTransformer(transformers, remainder="drop")
Exemple #4
0
def make_lightgbm_dataframe_mapper(dtypes, missing_value_aware=True):
    """Construct a DataFrameMapper for feeding complex data into a LGBMModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	Tuple (DataFrameMapper, list of categorical columns indices)

	"""
    features = list()
    categorical_features = list()
    i = 0
    for column, dtype in dtypes.items():
        if _is_categorical(dtype):
            features.append(([column], PMMLLabelEncoder(
                missing_values=-1) if missing_value_aware else LabelEncoder()))
            categorical_features.append(i)
        else:
            features.append(([column], None))
        i += 1
    return (DataFrameMapper(features), categorical_features)
Exemple #5
0
def make_lightgbm_column_transformer(dtypes, missing_value_aware=True):
    """Construct a ColumnTransformer for feeding complex data into a LGBMModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns:
	Tuple (ColumnTransformer, list of categorical column indices)

	"""
    transformers = list()
    categorical_features = list()
    i = 0
    for column, dtype in dtypes.items():
        if _is_categorical(dtype):
            transformers.append(
                (column, PMMLLabelEncoder(missing_values=-1)
                 if missing_value_aware else OrdinalEncoder(), [column]))
            categorical_features.append(i)
        else:
            transformers.append((column, "passthrough", [column]))
        i += 1
    return (ColumnTransformer(transformers,
                              remainder="drop"), categorical_features)