def __init__(self, project: Project): self.change_collector = ChangeCollector(project) X_df, y_df = project.api.load_data() encoder = project.api.encoder y = encoder.fit_transform(y_df) self.X_df, self.y = subsample_data_for_validation(X_df, y)
def validate_feature_acceptance(accepter_class, feature, features, X_df, y_df, X_df_val, y_val, subsample): if subsample: X_df, y_df, X_df_val, y_val = subsample_data_for_validation( X_df, y_df, X_df_val, y_val) accepter = accepter_class(X_df, y_df, X_df_val, y_val, features, feature) return accepter.judge()
def validate_feature_api( feature: Feature, X_df: pd.DataFrame, y_df: Union[pd.DataFrame, pd.Series], subsample: bool, log_advice: bool = False, ) -> bool: logger.debug(f'Validating feature {feature!r}') if subsample: X_df, y_df = subsample_data_for_validation(X_df, y_df) valid, failures, advice = check_from_class(FeatureApiCheck, feature, X_df, y_df) if valid: logger.info('Feature is valid') else: if log_advice: logger.info( 'Feature is NOT valid; here is some advice for resolving the ' 'feature API issues.') for failure, advice_item in zip(failures, advice): logger.info(f'{failure}: {advice_item}') else: logger.info(f'Feature is NOT valid; failures were {failures}') return valid
def _load_validation_data(self, X_df: pd.DataFrame, y_df: pd.DataFrame, subsample: bool): if X_df is None or y_df is None: _X_df, _y_df = self.api.load_data() if X_df is None: X_df = _X_df if y_df is None: y_df = _y_df if subsample: X_df, y_df = subsample_data_for_validation(X_df, y_df) return X_df, y_df
def validate_feature_api(feature, X, y, subsample=False): logger.debug('Validating feature {feature!r}'.format(feature=feature)) if subsample: X, y = subsample_data_for_validation(X, y) valid, failures = check_from_class(FeatureApiCheck, feature, X, y) if valid: logger.info('Feature is valid') else: logger.info('Feature is NOT valid; failures were {failures}'.format( failures=failures)) return valid
def validate_feature_acceptance(feature, X, y, subsample=False, path=None, package=None): if package is not None: project = Project(package) elif path is not None: project = Project.from_path(path) else: project = Project.from_cwd() if subsample: X, y = subsample_data_for_validation(X, y) # build project result = project.build(X, y) # load accepter for this project Accepter = _load_class(project, 'validation.feature_accepter') accepter = Accepter(result.X_df, result.y, result.features, feature) return accepter.judge()
def __init__(self, project): self.change_collector = ChangeCollector(project) X, y = project.load_data() self.X, self.y = subsample_data_for_validation(X, y)