def test_rename_column_names_to_numeric(): X = np.array([[1, 2], [3, 4]]) pd.testing.assert_frame_equal(_rename_column_names_to_numeric(X), pd.DataFrame(X)) X = pd.DataFrame({"<>": [1, 2], ">>": [2, 4]}) pd.testing.assert_frame_equal(_rename_column_names_to_numeric(X), pd.DataFrame({ 0: [1, 2], 1: [2, 4] })) X = ww.DataTable(pd.DataFrame({ "<>": [1, 2], ">>": [2, 4] }), logical_types={ "<>": "categorical", ">>": "categorical" }) X_renamed = _rename_column_names_to_numeric(X) X_expected = pd.DataFrame({ 0: pd.Series([1, 2], dtype="category"), 1: pd.Series([2, 4], dtype="category") }) pd.testing.assert_frame_equal(X_renamed.to_dataframe(), X_expected) assert X_renamed.logical_types == { 0: ww.logical_types.Categorical, 1: ww.logical_types.Categorical }
def _encode_categories(self, X, fit=False): """Encodes each categorical feature using ordinal encoding.""" X_encoded = _convert_to_woodwork_structure(X) X_encoded = _rename_column_names_to_numeric(X_encoded) cat_cols = list(X_encoded.select('category').columns) X_encoded = _convert_woodwork_types_wrapper(X_encoded.to_dataframe()) if len(cat_cols) == 0: return X_encoded if fit: self._ordinal_encoder = OrdinalEncoder() encoder_output = self._ordinal_encoder.fit_transform(X_encoded[cat_cols]) else: encoder_output = self._ordinal_encoder.transform(X_encoded[cat_cols]) X_encoded[cat_cols] = pd.DataFrame(encoder_output) X_encoded[cat_cols] = X_encoded[cat_cols].astype('category') return X_encoded
def predict_proba(self, X): X = _rename_column_names_to_numeric(X, flatten_tuples=False) return super().predict_proba(X)
def fit(self, X, y=None): X, y = super()._manage_woodwork(X, y) self.input_feature_names = list(X.columns) X = _rename_column_names_to_numeric(X, flatten_tuples=False) self._component_obj.fit(X, y) return self
def predict(self, X): X = _rename_column_names_to_numeric(X) predictions = super().predict(X) return predictions
def fit(self, X, y=None): X = _rename_column_names_to_numeric(X) return super().fit(X, y)
def predict_proba(self, X): X = _rename_column_names_to_numeric(X) return super().predict_proba(X)