def scaleDataColumns(df): """ Scale columnwise, only certain columns :param df: dataframe, no scaled data :return: dataframe, with scaled data """ enc = ColumnTransformer(remainder="passthrough", transformers=[("std", StandardScaler(), [ "ts_listen", "media_id", "release_date", "media_duration", "user_id", "artist_id" ])]) df_scaled = enc.fit_transform(df) df = pd.DataFrame(df_scaled, columns=enc.get_feature_names_out()) return df
# :meth:`compose.ColumnTransformer.get_feature_names_out` is avaliable to # combine feature names of its transformers: from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder import pandas as pd X = pd.DataFrame({"pet": ["dog", "cat", "fish"], "age": [3, 7, 1]}) preprocessor = ColumnTransformer( [ ("numerical", StandardScaler(), ["age"]), ("categorical", OneHotEncoder(), ["pet"]), ], verbose_feature_names_out=False, ).fit(X) preprocessor.get_feature_names_out() # %% # When this ``preprocessor`` is used with a pipeline, the feature names used # by the classifier are obtained by slicing and calling # :term:`get_feature_names_out`: from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline y = [1, 0, 1] pipe = make_pipeline(preprocessor, LogisticRegression()) pipe.fit(X, y) pipe[:-1].get_feature_names_out() ##############################################################################