Exemple #1
0
class NewFeatureTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, quantile_range=None):
        super().__init__()
        if not quantile_range:
            self.cc = ColumnCapper()
        else:
            self.cc = ColumnCapper(quantile_range=quantile_range)

    def fit(self, X, y=None):
        self.cc.fit(X["annual_inc"].values.reshape(-1, 1))

        return self

    def transform(self, X):

        func_tot_loan_amnt = lambda df: df["installment"] * (
            (1 + (df["int_rate"] / 100))**(df["term"] / 12))
        func_int_to_inc = lambda df: ((df["installment"] * 12) /
                                      (df["annual_inc"])) * 100
        func_tot_loan_to_inc = lambda df: (df["total_loan_amount"] /
                                           (df["annual_inc"] *
                                            (df["term"] / 12))) * 100

        X = (X.assign(annual_inc=lambda df: self.cc.transform(df[
            "annual_inc"].values.reshape(-1, 1)),
                      total_loan_amount=func_tot_loan_amnt,
                      installment_to_income=func_int_to_inc,
                      total_loan_to_income=func_tot_loan_to_inc))

        return X
def test_X_types_and_transformed_shapes(valid_df):
    def expect_value_error(X, X_transform=None):
        if X_transform is None:
            X_transform = X
        with pytest.raises(ValueError):
            capper = ColumnCapper().fit(X)
            capper.transform(X_transform)

    # Fitted and transformed arrays must have the same number of columns
    expect_value_error(valid_df, valid_df[["a", "b"]])

    invalid_dfs = [
        pd.DataFrame({
            "a": [np.nan, np.nan, np.nan],
            "b": [11, 12, 13]
        }),
        pd.DataFrame({
            "a": [np.inf, np.inf, np.inf],
            "b": [11, 12, 13]
        }),
    ]

    for invalid_df in invalid_dfs:
        expect_value_error(invalid_df)  # contains an invalid column ('a')
        expect_value_error(
            invalid_df["b"]
        )  # 1d arrays should be reshaped before fitted/transformed
        # Like this:
        ColumnCapper().fit_transform(invalid_df["b"].values.reshape(-1, 1))
        ColumnCapper().fit_transform(invalid_df["b"].values.reshape(1, -1))

    capper = ColumnCapper()
    for X in valid_df, valid_df.values:
        assert capper.fit_transform(X).shape == X.shape
def test_interpolation():
    valid_interpolations = ("linear", "lower", "higher", "midpoint", "nearest")
    invalid_interpolations = ("test", 42, None, [], {}, set(), 0.42)

    for interpolation in valid_interpolations:
        ColumnCapper(interpolation=interpolation)

    for interpolation in invalid_interpolations:
        with pytest.raises(ValueError):
            ColumnCapper(interpolation=interpolation)
def test_interpolation():
    valid_interpolations = ('linear', 'lower', 'higher', 'midpoint', 'nearest')
    invalid_interpolations = ('test', 42, None, [], {}, set(), .42)

    for interpolation in valid_interpolations:
        ColumnCapper(interpolation=interpolation)

    for interpolation in invalid_interpolations:
        with pytest.raises(ValueError):
            ColumnCapper(interpolation=interpolation)
def test_nan_inf(valid_df):
    # Capping infs
    capper = ColumnCapper(discard_infs=False)
    assert (capper.fit_transform(valid_df) == np.inf).sum().sum() == 0
    assert np.isnan(capper.fit_transform(valid_df)).sum() == 1

    # Discarding infs
    capper = ColumnCapper(discard_infs=True)
    assert (capper.fit_transform(valid_df) == np.inf).sum().sum() == 0
    assert np.isnan(capper.fit_transform(valid_df)).sum() == 2
 def expect_value_error(X, X_transform=None):
     if X_transform is None:
         X_transform = X
     with pytest.raises(ValueError):
         capper = ColumnCapper().fit(X)
         capper.transform(X_transform)
 def expect_value_error(quantile_range):
     with pytest.raises(ValueError):
         ColumnCapper(quantile_range)
 def expect_type_error(quantile_range):
     with pytest.raises(TypeError):
         ColumnCapper(quantile_range)
def test_estimator_checks(test_fn):
    test_fn(ColumnCapper.__name__, ColumnCapper())
def test_dtype_classification(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    assert ColumnCapper().fit(X, y).transform(X).dtype in FLOAT_DTYPES
def test_dtype_regression(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    assert ColumnCapper().fit(X, y).transform(X).dtype in FLOAT_DTYPES
Exemple #12
0
 def __init__(self, quantile_range=None):
     super().__init__()
     if not quantile_range:
         self.cc = ColumnCapper()
     else:
         self.cc = ColumnCapper(quantile_range=quantile_range)