Ejemplo n.º 1
0
def test_validate_np_array_with_provided_cols():
    # test check_dataframe with a np.ndarray and provided cols
    x, cols = check_dataframe(np.random.rand(5, 5), cols=[0, 1, 3])
    assert isinstance(x, pd.DataFrame)
    assert isinstance(cols, list)
    assert cols == [0, 1, 3]
    assert x.columns.tolist() == [0, 1, 2, 3, 4]
Ejemplo n.º 2
0
    def transform(self, X):
        """Apply the schema normalization.

        Parameters
        ----------
        X : pd.DataFrame, shape=(n_samples, n_features)
            The Pandas frame to transform. The operation will
            be applied to a copy of the input data, and the result
            will be returned.

        Returns
        -------
        X : pd.DataFrame or np.ndarray, shape=(n_samples, n_features)
            The operation is applied to a copy of ``X``,
            and the result set is returned.
        """
        check_is_fitted(self, "validator_")
        X, _ = check_dataframe(X, cols=self.cols)

        # make the document, normalize
        v = self.validator_
        X = pd.DataFrame.from_records(
            [v.normalized(record) for record in X.to_dict(orient='records')])

        return X if self.as_df else X.values
Ejemplo n.º 3
0
def test_check_dataframe_some_cols():
    # a check with all columns present
    X_copy, cols_copy = check_dataframe(X, cols=cols[:3])
    assert X.equals(X_copy)

    # cols_copy should NOT equal cols
    assert cols_copy != cols
    assert isinstance(cols_copy, list)
Ejemplo n.º 4
0
def test_check_dataframe_assert_all_finite():
    # a check with all columns present
    X_copy, cols_copy = check_dataframe(X, assert_all_finite=True)
    assert X.equals(X_copy)
    assert X_copy is not X

    # X_copy should equal X
    assert cols == cols_copy, (cols, cols_copy)
Ejemplo n.º 5
0
def test_check_dataframe_infinite():
    X_nan = X.mask(X < 0.3)

    # should not raise initially
    X_copy, _ = check_dataframe(X_nan)
    assert X_copy.equals(X_nan)

    # this will raise, since assert_all_finite is True
    assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
Ejemplo n.º 6
0
def test_check_dataframe_scalar_col():
    # a check with all columns present
    X_copy, cols_copy = check_dataframe(X, cols='col_0')
    assert X.equals(X_copy)

    # cols_copy should NOT equal cols
    assert cols_copy != cols
    assert isinstance(cols_copy, list)
    assert len(cols_copy) == 1
    assert cols_copy[0] == 'col_0'
Ejemplo n.º 7
0
def test_check_dataframe_no_cols():
    # a check with all columns present
    X_copy, cols_copy = check_dataframe(X, cols=None)
    assert X.equals(X_copy)

    # assert cols is a list that equals ALL cols
    assert cols_copy is not cols
    assert cols == cols_copy, (cols, cols_copy)
    assert isinstance(cols_copy, list)
    assert X_copy.columns.tolist() == cols
Ejemplo n.º 8
0
def test_check_dataframe_with_diff():
    # a check with all columns present
    X_copy, cols_copy, diff = check_dataframe(X, cols=cols, column_diff=True)

    # neither copy should not share the same reference (still)
    assert X_copy is not X
    assert cols_copy is not cols

    # assert equalities
    assert X.equals(X_copy)
    assert cols == cols_copy, (cols, cols_copy)
    assert not diff
Ejemplo n.º 9
0
def test_check_dataframe_all_cols():
    # a check with all columns present
    X_copy, cols_copy = check_dataframe(X, cols=cols)

    # neither copy should not share the same reference
    assert X_copy is not X
    assert cols_copy is not cols

    # X_copy should equal X
    assert X.equals(X_copy)
    assert cols == cols_copy, (cols, cols_copy)
    assert isinstance(cols_copy, list)
    assert X_copy.columns.tolist() == cols
Ejemplo n.º 10
0
def test_check_dataframe_array():
    X_copy, cols_copy = check_dataframe(array, cols=None)
    assert isinstance(X_copy, pd.DataFrame)
    assert cols_copy == list(range(5))