def test_input(generate_wrong_data_one): """ The function tess if the input data type is correct or not. Examples -------- >>> test_input(generate_wrong_data_one) """ df, y = generate_wrong_data_one try: select_features.select_features(df, y, n_features=1) except AssertionError: pass
def test_columns(): """ The function tess if the input data has columns or not. Examples -------- >>> test_input(generate_wrong_data_one) """ df = pd.DataFrame(([1, 'x'])) y = np.array([1, 2, 3]) try: select_features.select_features(df, y, n_features=1) except AssertionError: pass
def run_pylaundry(): """ Runs all modules of pylaundry Arguments -------- NA Returns ------ features_selected = list of final features selected """ col_dict = categorize(df=X_train) # second function - fill_missing clean_data = fill_missing(X_train, X_test, col_dict, num_imp="mean", cat_imp="mode") # third function - transform_columns transformed_data = transform_columns(clean_data['X_train'], clean_data['X_test'], col_dict) # fourth function - feature selection features_selected = select_features(transformed_data['X_train'], y_train, n_features=2) return features_selected
def test_dataframe(generate_wrong_data): """ The function tests if the column type is correct in the input data. Examples -------- >>> test_dataframe(generate_wrong_data) """ df = generate_wrong_data y = df['y'].values df = df[['x1', 'x2', 'x3']] try: select_features.select_features(df, y, n_features=1) except AssertionError: pass
def test_regression_one(generate_data_regression_one): """ The function does regression test for multi feature Examples -------- >>> test_regression_one(generate_data_regression_one) """ df = generate_data_regression_one y = df['y'].values df = df[['x1', 'x2', 'x3']] assert select_features.select_features(df, y, n_features=2) == ["x1", "x2"]
def test_regression(generate_data_regression): """ The function does regression test for single feature Examples -------- >>> test_regression(generate_data_regression) """ df = generate_data_regression y = df['y'].values df = df[['x1', 'x2', 'x3']] assert select_features.select_features(df, y, n_features=1) == ["x1"]
def test_classification_multi(generate_data_classification_multi): """ The function does classification test for multiple feature Examples -------- >>> test_classification_multi(generate_data_classification_multi) """ df = generate_data_classification_multi y = df['y'].values df = df[['x1', 'x2', 'x3']] t = select_features.select_features(df, y, mode="classification", n_features=2) assert t == ["x1", "x2"]
def test_classification(generate_data_classification): """ The function does classification test for single feature Examples -------- >>> test_classification(generate_data_classification) """ df = generate_data_classification y = df['y'].values df = df[['x1', 'x2', 'x3']] t = select_features.select_features(df, y, mode="classification", n_features=1) assert t == ["x1"]