def test_integration_nonperfect_linear(): X, y = data_gen(nrows=100, Non_perfect=True) X_train, X_test, y_train, y_test = train_test_split(X, y) scores = cross_validation(lm(), X=X_train, y=y_train, shuffle=False) summary = summary_cv(scores) assert summary['mean'] < 1 and summary['median'] < 1 and summary[ 'sd'] > 0, 'Non-perfect linear relation test does not give correct summary'
def test_X_y_perfect_linear_random_state_None(): X, y = data_gen(nrows=100, Non_perfect=False) function_scores = cross_validation(lm(), X=X, y=y, shuffle=True, random_state=None) assert np.mean(function_scores ) == 1, "shuffle=True, randome state should have no effect"
def test_shuffle_as_boolean(): with pytest.raises(TypeError('`shuffle` must be True or False')): cross_validation(lm, X=X, y=y, shuffle='1') with pytest.raises(TypeError('`shuffle` must be True or False')): cross_validation(lm, X=X, y=y, shuffle=1) with pytest.raises(TypeError('`shuffle` must be True or False')): cross_validation(lm, X=X, y=y, shuffle=1.0)
def test_X_y_Nrows(): X, y = data_gen(nrows=2) with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, k=2)
def test_y_one_column(): X, y = data_gen() y = X with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y)
def test_X_y_perfect_linear(): X, y = data_gen(nrows=100, Non_perfect=False) function_scores = cross_validation(lm(), X=X, y=y, shuffle=False) assert np.mean(function_scores) == 1, "Testing perfect linear case should have perfect socres (score=1)"
def test_k_range_k_not_one(): X, y = data_gen(nrows=5) with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, k=1)
def test_random_state_range(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, random_state=-10)
def test_y_one_column(): with pytest.raises(TypeError('`y` is more than one feature')): cross_validation(lm, X=X, y=y_2)
def test_X_as_dataframe(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X="X", y=y)
def test_shuffle_as_boolean_not_numeric(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, shuffle=1.0)
def test_random_state_as_number(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, random_state='10')
def test_compare_sklearn_mod_not_0(): X, y = data_gen(nrows=100) function_scores = cross_validation(lm(), X=X, y=y, shuffle=False) sklearn_scores = cross_val_score(lm(), X=X, y=y, cv=3) assert max(function_scores - sklearn_scores) < 0.000001, "results doesn't match sklearn"
def test_k_as_number(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, k='3')
def test_X_y_not_perfect_linear(): X, y = data_gen(nrows=100, Non_perfect=True) function_scores = cross_validation(lm(), X=X, y=y, shuffle=False) assert np.mean(function_scores) > 0 and np.mean( function_scores) < 1, "results doesn't match sklearn"
def test_X_y_match(): X, y = data_gen() y = y[0:90] with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y)
def test_X_y_perfect_linear(): X, y = data_gen(nrows=100, Non_perfect=False) function_scores = cross_validation(lm(), X=X, y=y, shuffle=False) assert np.mean( function_scores ) == 1, "Testing perfect linear case should have perfect socres (score=1)"
def test_model_linear_regression(): X, y = data_gen() with pytest.raises(TypeError): cross_validation("LINEAR MODEL", X=X, y=y, random_state=10)
def test_k_range_k_not_larger_than_nrows(): X, y = data_gen(nrows=5) with pytest.raises(TypeError): cross_validation(lm(), X=X, y=y, k=40)
def test_X_as_dataframe(): with pytest.raises(TypeError('`X` must be a dataframe')): cross_validation(lm, X=X_matrix, y=y)
def test_X_y_Nrows(): with pytest.raises( TypeError('sample size is less than 3, too small for CV')): cross_validation(lm, X=X.iloc[0:2, :], y=y.iloc[0:2, :])
def test_y_as_dataframe(): with pytest.raises(TypeError('`y` must be a dataframe')): cross_validation(lm, X=X, y=y_list)
def test_k_as_number(): with pytest.raises(TypeError('`k` must be an integer')): cross_validation(lm, X=X, y=y, k='3')
def test_X_y_not_perfect_linear(): X, y = data_gen(nrows=100, Non_perfect=True) function_scores = cross_validation(lm(), X=X, y=y, shuffle=False) assert np.mean(function_scores) > 0 and np.mean(function_scores) < 1, "results doesn't match sklearn"
def test_y_as_dataframe(): X, y = data_gen() with pytest.raises(TypeError): cross_validation(lm(), X=X, y="y")
def test_X_y_perfect_linear_random_state_None(): X, y = data_gen(nrows=100, Non_perfect=False) function_scores = cross_validation(lm(), X=X, y=y, shuffle=True, random_state=None) assert np.mean(function_scores) == 1, "shuffle=True, randome state should have no effect"
def test_random_state_as_number(): with pytest.raises(TypeError('`random_state` must be a number or None')): cross_validation(lm, X=X, y=y, random_state='10')
def test_k_range(): with pytest.raises(TypeError('`k` must be an integer 2 or greater')): cross_validation(lm, X=X, y=y, k=1) with pytest.raises(TypeError('`k` must be greater than # obs in X and y')): cross_validation(lm, X=X, y=y, k=40)
def test_random_state_range(): with pytest.raises(TypeError('`random_state` must be nonnegative')): cross_validation(lm, X=X, y=y, random_state=-10)
def test_X_y_match(): with pytest.raises(TypeError("dim of `X` doesn't equal dim of `y`")): cross_validation(lm, X=X_longer, y=y)