예제 #1
0
def validate_feature_api(
    feature: Feature,
    X_df: pd.DataFrame,
    y_df: Union[pd.DataFrame, pd.Series],
    subsample: bool,
    log_advice: bool = False,
) -> bool:
    logger.debug(f'Validating feature {feature!r}')
    if subsample:
        X_df, y_df = subsample_data_for_validation(X_df, y_df)
    valid, failures, advice = check_from_class(FeatureApiCheck, feature, X_df,
                                               y_df)
    if valid:
        logger.info('Feature is valid')
    else:
        if log_advice:
            logger.info(
                'Feature is NOT valid; here is some advice for resolving the '
                'feature API issues.')
            for failure, advice_item in zip(failures, advice):
                logger.info(f'{failure}: {advice_item}')
        else:
            logger.info(f'Feature is NOT valid; failures were {failures}')

    return valid
예제 #2
0
 def test_producing_missing_values_fails(self):
     assert has_nans(self.X)
     feature = Feature(input='size', transformer=IdentityTransformer())
     valid, failures = check_from_class(FeatureApiCheck, feature, self.X,
                                        self.y)
     self.assertFalse(valid)
     self.assertIn(NoMissingValuesCheck.__name__, failures)
예제 #3
0
 def test_bad_feature_transform_errors(self):
     # transformer throws errors
     feature = Feature(input='size',
                       transformer=FragileTransformer((lambda x: True, ),
                                                      (RuntimeError, )))
     valid, failures = check_from_class(FeatureApiCheck, feature, self.X,
                                        self.y)
     self.assertFalse(valid)
     self.assertIn(CanTransformCheck.__name__, failures)
예제 #4
0
def test_producing_missing_values_fails(sample_data):
    assert has_nans(sample_data.X)
    feature = Feature(
        input='size',
        transformer=IdentityTransformer()
    )
    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert not valid
    assert NoMissingValuesCheck.__name__ in failures
예제 #5
0
def test_bad_feature_input(sample_data):
    # bad input
    feature = Feature(
        input=3,
        transformer=SimpleImputer(),
    )
    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert not valid
    assert HasCorrectInputTypeCheck.__name__ in failures
예제 #6
0
def test_good_feature(sample_data):
    feature = Feature(
        input='size',
        transformer=SimpleImputer(),
    )

    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert valid
    assert len(failures) == 0
예제 #7
0
 def test_bad_feature_input(self):
     # bad input
     feature = Feature(
         input=3,
         transformer=SimpleImputer(),
     )
     valid, failures = check_from_class(
         FeatureApiCheck, feature, self.X, self.y)
     self.assertFalse(valid)
     self.assertIn(HasCorrectInputTypeCheck.__name__, failures)
예제 #8
0
    def test_good_feature(self):
        feature = Feature(
            input='size',
            transformer=SimpleImputer(),
        )

        valid, failures = check_from_class(
            FeatureApiCheck, feature, self.X, self.y)
        self.assertTrue(valid)
        self.assertEqual(len(failures), 0)
예제 #9
0
def test_bad_feature_transform_errors(sample_data):
    # transformer throws errors
    feature = Feature(
        input='size',
        transformer=FragileTransformer(
            (lambda x: True, ), (RuntimeError, ))
    )
    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert not valid
    assert CanTransformCheck.__name__ in failures
예제 #10
0
def validate_feature_api(feature, X, y, subsample=False):
    logger.debug('Validating feature {feature!r}'.format(feature=feature))
    if subsample:
        X, y = subsample_data_for_validation(X, y)
    valid, failures = check_from_class(FeatureApiCheck, feature, X, y)
    if valid:
        logger.info('Feature is valid')
    else:
        logger.info('Feature is NOT valid; failures were {failures}'.format(
            failures=failures))
    return valid
예제 #11
0
def test_bad_feature_deepcopy_fails(sample_data):
    class _CopyFailsTransformer(IdentityTransformer):
        def __deepcopy__(self, memo):
            raise RuntimeError
    feature = Feature(
        input='size',
        transformer=_CopyFailsTransformer(),
    )
    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert not valid
    assert CanDeepcopyCheck.__name__ in failures
예제 #12
0
 def test_bad_feature_deepcopy_fails(self):
     class _CopyFailsTransformer(IdentityTransformer):
         def __deepcopy__(self, memo):
             raise RuntimeError
     feature = Feature(
         input='size',
         transformer=_CopyFailsTransformer(),
     )
     valid, failures = check_from_class(
         FeatureApiCheck, feature, self.X, self.y)
     self.assertFalse(valid)
     self.assertIn(CanDeepcopyCheck.__name__, failures)
예제 #13
0
def test_bad_feature_wrong_transform_length(sample_data):
    class _WrongLengthTransformer(BaseTransformer):
        def transform(self, X, **transform_kwargs):
            new_shape = list(X.shape)
            new_shape[0] += 1
            output = np.arange(np.prod(new_shape)).reshape(new_shape)
            return output

    # doesn't return correct length
    feature = Feature(
        input='size',
        transformer=_WrongLengthTransformer(),
    )
    valid, failures, advice = check_from_class(
        FeatureApiCheck, feature, sample_data.X, sample_data.y)
    assert not valid
    assert HasCorrectOutputDimensionsCheck.__name__ in failures
예제 #14
0
    def test_bad_feature_wrong_transform_length(self):
        class _WrongLengthTransformer(BaseTransformer):
            def transform(self, X, **transform_kwargs):
                new_shape = list(X.shape)
                new_shape[0] += 1
                output = np.arange(np.prod(new_shape)).reshape(new_shape)
                return output

        # doesn't return correct length
        feature = Feature(
            input='size',
            transformer=_WrongLengthTransformer(),
        )
        valid, failures = check_from_class(
            FeatureApiCheck, feature, self.X, self.y)
        self.assertFalse(valid)
        self.assertIn(HasCorrectOutputDimensionsCheck.__name__, failures)