def test_ordinal_encoder_features_names_out_pandas(): """Check feature names out is same as the input.""" pd = pytest.importorskip("pandas") names = ["b", "c", "a"] X = pd.DataFrame([[1, 2, 3]], columns=names) enc = OrdinalEncoder().fit(X) feature_names_out = enc.get_feature_names_out() assert_array_equal(names, feature_names_out)
# `sparse=False` is used in the `OneHotEncoder` for didactic purposes, namely # easier visualization of the data. # # Sparse matrices are efficient data structures when most of your matrix # elements are zero. They won't be covered in detail in this course. If you # want more details about them, you can look at # [this](https://scipy-lectures.org/advanced/scipy_sparse/introduction.html#why-sparse-matrices). # ``` # %% [markdown] # We see that encoding a single feature will give a NumPy array full of zeros # and ones. We can get a better understanding using the associated feature # names resulting from the transformation. # %% feature_names = encoder.get_feature_names_out(input_features=["education"]) education_encoded = pd.DataFrame(education_encoded, columns=feature_names) education_encoded # %% [markdown] # As we can see, each category (unique value) became a column; the encoding # returned, for each sample, a 1 to specify which category it belongs to. # # Let's apply this encoding on the full dataset. # %% print(f"The dataset is composed of {data_categorical.shape[1]} features") data_categorical.head() # %% data_encoded = encoder.fit_transform(data_categorical)