def test_selective_pca(): # create a copy of the original original = X.copy() # set the columns we'll fit to just be the first cols = [names[0]] # 'a' # the "other" names, and their corresponding matrix comp_column_names = names[1:] compare_cols = original[comp_column_names].as_matrix() # now fit PCA on the first column only transformer = SelectivePCA(cols=cols, n_components=0.85).fit(original) transformed = transformer.transform(original) # get the untouched columns to compare. These should be equal!! untouched_cols = transformed[comp_column_names].as_matrix() assert_array_almost_equal(compare_cols, untouched_cols) # make sure the component is present in the columns assert 'PC1' in transformed.columns assert transformed.shape[1] == 4 assert isinstance(transformer.get_decomposition(), PCA) assert SelectivePCA().get_decomposition() is None # test that cols was provided assert isinstance(transformer.cols, list) assert transformer.cols[0] == cols[0] # what if we want to weight it? pca_weighted = SelectivePCA(do_weight=True, n_components=0.99, as_df=True)\ .fit_transform(original) assert_raises(AssertionError, assert_array_equal, pca_weighted, transformed)
def test_qr(): # test just the decomp first q = QRDecomposition(X) aux = q.qraux assert_array_almost_equal( aux, np.array([1.07056264, 1.0559255, 1.03857984, 1.04672249])) # test that we can get the rank assert q.get_rank() == 4 # test that we can get the R matrix and that it's rank 4 assert q.get_R_rank() == 4 # next, let's test that we can get the coefficients: coef = q.get_coef(X) assert_array_almost_equal( coef, np.array([ [1.00000000e+00, 1.96618714e-16, -0.00000000e+00, -2.00339858e-16], [3.00642915e-16, 1.00000000e+00, -0.00000000e+00, 1.75787325e-16], [-4.04768123e-16, 4.83060041e-17, 1.00000000e+00, 4.23545747e-16], [-1.19866575e-16, -1.74365433e-17, 1.10216442e-17, 1.00000000e+00] ])) # ensure dimension error assert_raises(ValueError, q.get_coef, X[:140, :])
def test_alternative_exception(): def func_that_raises_type_error(): raise TypeError("This is a type error!") def func_that_asserts_incorrectly(): assert_raises(ValueError, func_that_raises_type_error) assert_raises(TypeError, func_that_raises_type_error) assert_raises(TypeError, func_that_asserts_incorrectly)
def test_check_dataframe_infinite(): X_nan = X.mask(X < 0.3) # should not raise initially X_copy, _ = check_dataframe(X_nan) assert X_copy.equals(X_nan) # this will raise, since assert_all_finite is True assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
def test_nzv_bad_freq_cut(): X = pd.DataFrame.from_records(data=np.array([[1, 2, 3], [4, 5, 3], [6, 7, 5]]), columns=['a', 'b', 'c']) # show fails with a bad float value nzv_float = NearZeroVarianceFilter(freq_cut=1.) assert_raises(ValueError, nzv_float.fit, X) # show fails with a non-float/int nzv_str = NearZeroVarianceFilter(freq_cut='1.') assert_raises(ValueError, nzv_str.fit, X)
def test_validate_test_cols(): fit = ['a', 'b', 'c'] test = ['a', 'b', 'c'] # this will pass; all fit are in test validate_test_set_columns(fit, test) # this will also pass; all fit are in test test.append('d') validate_test_set_columns(fit, test) # this will NOT pass; one is now missing from test test = test[1:] assert_raises(ValueError, validate_test_set_columns, fit, test)
def test_linear_combos(): lcf = LinearCombinationFilter().fit(Z) assert lcf.drop_ == ['C'], lcf.drop_ z = lcf.transform(Z) assert_array_equal(z.columns.values, ['A', 'B']) assert (z.B == 1).all() # test on no linear combos lcf = LinearCombinationFilter(cols=['A', 'B']).fit(Z) assert not lcf.drop_ assert Z.equals(lcf.transform(Z)) # test too few features assert_raises(ValueError, LinearCombinationFilter(cols=['A']).fit, Z)
def test_interaction_corners(): # assert fails with a non-function arg assert_raises(TypeError, InteractionTermTransformer(interaction_function='a').fit, X_pd) # test with just two cols # try with no cols arg trans = InteractionTermTransformer(cols=['a', 'b']) X_trans = trans.fit_transform(X_pd) expected_names = ['a', 'b', 'c', 'd', 'a_b_I'] assert all([ i == j for i, j in zip(X_trans.columns.tolist(), expected_names) ]) # assert col names equal assert_array_equal( X_trans.as_matrix(), np.array([[0, 1, 0, 1, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 0], [1, 1, 1, 0, 1]])) # test diff columns on test set to force value error X_test = X_pd.drop(['a'], axis=1) assert_raises(ValueError, trans.transform, X_test)
def test_selective_imputer_bad_strategies(): # raises for a bad strategy string imputer = SelectiveImputer(strategy="bad strategy") assert_raises(ValueError, imputer.fit, X) # raises for a dim mismatch in cols and strategy imputer = SelectiveImputer(cols=['a'], strategy=['mean', 'mean']) assert_raises(ValueError, imputer.fit, X) # test type error for bad strategy imputer = SelectiveImputer(strategy=1) assert_raises(TypeError, imputer.fit, X) # test dict input that does not match dim-wise imputer = SelectiveImputer(cols=['a'], strategy={ 'a': 'mean', 'b': 'median' }) assert_raises(ValueError, imputer.fit, X) # test a dict input with bad columns breaks imputer = SelectiveImputer(strategy={'a': 'mean', 'D': 'median'}) assert_raises(ValueError, imputer.fit, X)
def test_failing_assert_raises(): def func_that_fails_assertion(): assert_raises(ValueError, func=(lambda: None)) assert_raises(AssertionError, func_that_fails_assertion)
def test_assert_raises(): assert_raises(ValueError, func_that_raises)
def test_mcf_non_finite(): mcf = MultiCorrFilter(threshold=0.75) assert_raises(ValueError, mcf.fit, sparse)
def test_check_dataframe_bad_X(): assert_raises(TypeError, check_dataframe, 'string')
def test_check_dataframe_array_cols(): assert_raises(ValueError, check_dataframe, array, cols=[1, 2, 3, 4, 5])
def func_that_asserts_incorrectly(): assert_raises(ValueError, func_that_raises_type_error)
def test_get_callable_key_error(): strat = "some strategy" valid = {"some other strategy": (lambda: None)} assert_raises(ValueError, _get_callable, strat, valid)
def test_bagged_classifier_continuous(): imputer = BaggedClassifierImputer() # fails on continuous data! assert_raises(ValueError, imputer.fit, X)
def test_bagged_regressor_single_predictor_corner(): # fails because only one predictor, and it's in cols imputer = BaggedRegressorImputer(predictors=['a']) assert_raises(ValueError, imputer.fit, X)
def test_validate_multiple_rows(): X_copy = np.random.rand(2, 2) assert_raises(ValueError, validate_multiple_rows, "cls", X_copy[:1, :]) # this works validate_multiple_rows("cls", X_copy)
def func_that_fails_assertion(): assert_raises(ValueError, func=(lambda: None))
def test_get_callable_type_error(): strat = 123 # not a string or callable valid = {"some strategy": (lambda: None)} assert_raises(TypeError, _get_callable, strat, valid)
def test_check_dataframe_bad_cols(): # a check with all columns present assert_raises(ValueError, check_dataframe, X, cols=['bad', 'cols'])
def test_raise_build_error(): try: raise ValueError("this is a dummy err msg") except ValueError as v: assert_raises(ImportError, raise_build_error, v)
def test_none_present(): series = pd.Series(np.ones(5) * np.nan) mask = pd.isnull(series) assert_raises(ValueError, _get_present_values, series, mask)