Beispiel #1
0
def test_freqfeature_handles_nans_correctly(categorical_na):
    freq_feature = FreqFeature()
    result = freq_feature.fit_transform(categorical_na)

    assert isinstance(result, pd.DataFrame)
    assert len(categorical_na) == len(result)
    assert set(categorical_na.columns) == set(result)
    assert 0 == result.isna().sum().sum()
    assert 1 / (len(categorical_na) - 1) == result.iloc[0, 0]
Beispiel #2
0
def test_freq_features_returns_0_when_unseen_value_is_given(categorical):
    freq_feature = FreqFeature()
    freq_feature.fit(categorical)

    new_data = pd.DataFrame({"category_a": ["a1", "a2", "c25"]})
    result = freq_feature.transform(new_data)

    assert isinstance(result, pd.DataFrame)
    assert len(new_data) == len(result)
    assert 0 == result.iloc[-1, 0]
Beispiel #3
0
def test_freqfeature_returns_correctly(categorical):
    freq_feature = FreqFeature()
    result = freq_feature.fit_transform(categorical)

    assert isinstance(result, pd.DataFrame)
    assert len(categorical) == len(result)
    assert set(categorical.columns) == set(result.columns)
    for col in result.columns:
        assert pd.api.types.is_numeric_dtype(result[col])
    assert 1 / len(categorical) == result.iloc[0, 0]
    assert all(1 == result.sum())
 def test_freq_feature_can_be_used_in_cross_validation_string_data(
         self, categorical: pd.DataFrame):
     pipe = create_pipeline(FreqFeature())
     score = cross_val_score(pipe,
                             categorical,
                             np.array([1, 0, 1, 0]),
                             cv=2)
     assert np.all(score >= 0)
 def test_freq_feature_can_be_used_in_grid_search(
         self, categorical: pd.DataFrame):
     pipe = create_pipeline(FreqFeature())
     model = GridSearchCV(
         pipe,
         param_grid={"clf__strategy": ["stratified", "most_frequent"]},
         cv=2)
     model.fit(categorical, [1, 0, 1, 0])
     assert hasattr(model, "best_estimator_")
 def test_works_without_args(self):
     assert FreqFeature()