def test_validate_np_array_with_provided_cols(): # test check_dataframe with a np.ndarray and provided cols x, cols = check_dataframe(np.random.rand(5, 5), cols=[0, 1, 3]) assert isinstance(x, pd.DataFrame) assert isinstance(cols, list) assert cols == [0, 1, 3] assert x.columns.tolist() == [0, 1, 2, 3, 4]
def transform(self, X): """Apply the schema normalization. Parameters ---------- X : pd.DataFrame, shape=(n_samples, n_features) The Pandas frame to transform. The operation will be applied to a copy of the input data, and the result will be returned. Returns ------- X : pd.DataFrame or np.ndarray, shape=(n_samples, n_features) The operation is applied to a copy of ``X``, and the result set is returned. """ check_is_fitted(self, "validator_") X, _ = check_dataframe(X, cols=self.cols) # make the document, normalize v = self.validator_ X = pd.DataFrame.from_records( [v.normalized(record) for record in X.to_dict(orient='records')]) return X if self.as_df else X.values
def test_check_dataframe_some_cols(): # a check with all columns present X_copy, cols_copy = check_dataframe(X, cols=cols[:3]) assert X.equals(X_copy) # cols_copy should NOT equal cols assert cols_copy != cols assert isinstance(cols_copy, list)
def test_check_dataframe_assert_all_finite(): # a check with all columns present X_copy, cols_copy = check_dataframe(X, assert_all_finite=True) assert X.equals(X_copy) assert X_copy is not X # X_copy should equal X assert cols == cols_copy, (cols, cols_copy)
def test_check_dataframe_infinite(): X_nan = X.mask(X < 0.3) # should not raise initially X_copy, _ = check_dataframe(X_nan) assert X_copy.equals(X_nan) # this will raise, since assert_all_finite is True assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
def test_check_dataframe_scalar_col(): # a check with all columns present X_copy, cols_copy = check_dataframe(X, cols='col_0') assert X.equals(X_copy) # cols_copy should NOT equal cols assert cols_copy != cols assert isinstance(cols_copy, list) assert len(cols_copy) == 1 assert cols_copy[0] == 'col_0'
def test_check_dataframe_no_cols(): # a check with all columns present X_copy, cols_copy = check_dataframe(X, cols=None) assert X.equals(X_copy) # assert cols is a list that equals ALL cols assert cols_copy is not cols assert cols == cols_copy, (cols, cols_copy) assert isinstance(cols_copy, list) assert X_copy.columns.tolist() == cols
def test_check_dataframe_with_diff(): # a check with all columns present X_copy, cols_copy, diff = check_dataframe(X, cols=cols, column_diff=True) # neither copy should not share the same reference (still) assert X_copy is not X assert cols_copy is not cols # assert equalities assert X.equals(X_copy) assert cols == cols_copy, (cols, cols_copy) assert not diff
def test_check_dataframe_all_cols(): # a check with all columns present X_copy, cols_copy = check_dataframe(X, cols=cols) # neither copy should not share the same reference assert X_copy is not X assert cols_copy is not cols # X_copy should equal X assert X.equals(X_copy) assert cols == cols_copy, (cols, cols_copy) assert isinstance(cols_copy, list) assert X_copy.columns.tolist() == cols
def test_check_dataframe_array(): X_copy, cols_copy = check_dataframe(array, cols=None) assert isinstance(X_copy, pd.DataFrame) assert cols_copy == list(range(5))