Beispiel #1
0
def test_all_metrics():
    """Check all supported metrics work"""

    params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'],
              'arities': {2: ['add2', 'sub2', 'mul2', 'div2']},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    sample_weight = np.ones(5)
    expected = [1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258,
                -0.2928200724, -0.5]
    result = []
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle',
              'pearson', 'spearman']:
        gp.metric = m
        gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight)
        result.append(gp.fitness())
    assert_array_almost_equal(result, expected)
    # And check a fake one
    gp.metric = 'the larch'
    assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
Beispiel #2
0
def check_regressors_train(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)   # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01
    if name == 'PassiveAggressiveRegressor':
        regressor.C = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.fit(X.tolist(), y_.tolist())
    regressor.predict(X)

    # TODO: find out why PLS and CCA fail. RANSAC is random
    # and furthermore assumes the presence of outliers, hence
    # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        print(regressor)
        assert_greater(regressor.score(X, y_), 0.5)
Beispiel #3
0
def check_transformers_unfitted(name, Transformer):
    X, y = _boston_subset()

    with warnings.catch_warnings(record=True):
        transformer = Transformer()

    assert_raises(NotFittedError, transformer.transform, X)
Beispiel #4
0
def test_transformer_iterable():
    """Check that the transformer is iterable"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    est = SymbolicTransformer(generations=2, random_state=0)

    # Check unfitted
    unfitted_len = len(est)
    unfitted_iter = [gp.length_ for gp in est]
    expected_iter = []

    assert_true(unfitted_len == 0)
    assert_true(unfitted_iter == expected_iter)

    # Check fitted
    est.fit(X, y)
    fitted_len = len(est)
    fitted_iter = [gp.length_ for gp in est]
    expected_iter = [15, 19, 19, 12, 9, 10, 7, 14, 6, 21]

    assert_true(fitted_len == 10)
    assert_true(fitted_iter == expected_iter)

    # Check IndexError
    assert_raises(IndexError, est.__getitem__, 10)
Beispiel #5
0
def test_transformer_iterable():
    """Check that the transformer is iterable"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    function_set = [
        'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max',
        'min'
    ]
    est = SymbolicTransformer(population_size=500,
                              generations=2,
                              function_set=function_set,
                              random_state=0)

    # Check unfitted
    unfitted_len = len(est)
    unfitted_iter = [gp.length_ for gp in est]
    expected_iter = []

    assert_true(unfitted_len == 0)
    assert_true(unfitted_iter == expected_iter)

    # Check fitted
    est.fit(X, y)
    fitted_len = len(est)
    fitted_iter = [gp.length_ for gp in est]
    expected_iter = [15, 19, 19, 12, 9, 10, 7, 14, 6, 21]

    assert_true(fitted_len == 10)
    assert_true(fitted_iter == expected_iter)

    # Check IndexError
    assert_raises(IndexError, est.__getitem__, 10)
Beispiel #6
0
def test_validate_program():
    """Check that valid programs are accepted & invalid ones raise error"""

    function_set = ['add2', 'sub2', 'mul2', 'div2',
                    'sqrt1', 'log1', 'abs1', 'max2', 'min2']
    arities = {1: ['sqrt1', 'log1', 'abs1'],
               2: ['add2', 'sub2', 'mul2', 'div2', 'max2', 'min2']}
    init_depth = (2, 6)
    init_method = 'half and half'
    n_features = 10
    const_range = (-1.0, 1.0)
    metric = 'mean absolute error'
    p_point_replace = 0.05
    parsimony_coefficient = 0.1

    random_state = check_random_state(415)
    test_gp = ['sub2', 'abs1', 'sqrt1', 'log1', 'log1', 'sqrt1', 7, 'abs1',
               'abs1', 'abs1', 'log1', 'sqrt1', 2]

    # This one should be fine
    _ = _Program(function_set, arities, init_depth, init_method, n_features,
                 const_range, metric, p_point_replace, parsimony_coefficient,
                 random_state, test_gp)

    # Now try a couple that shouldn't be
    assert_raises(ValueError, _Program, function_set, arities, init_depth,
                  init_method, n_features, const_range, metric,
                  p_point_replace, parsimony_coefficient, random_state,
                  test_gp[:-1])
    assert_raises(ValueError, _Program, function_set, arities, init_depth,
                  init_method, n_features, const_range, metric,
                  p_point_replace, parsimony_coefficient, random_state,
                  test_gp + [1])
Beispiel #7
0
def test_make_rng():
    # Check the check_random_state utility function behavior
    assert_true(check_random_state(None) is np.random.mtrand._rand)
    assert_true(check_random_state(np.random) is np.random.mtrand._rand)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(42).randint(100) == rng_42.randint(100))

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(rng_42) is rng_42)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(43).randint(100) != rng_42.randint(100))

    assert_raises(ValueError, check_random_state, "some invalid seed")
Beispiel #8
0
def test_make_rng():
    # Check the check_random_state utility function behavior
    assert_true(check_random_state(None) is np.random.mtrand._rand)
    assert_true(check_random_state(np.random) is np.random.mtrand._rand)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(42).randint(100) == rng_42.randint(100))

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(rng_42) is rng_42)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(43).randint(100) != rng_42.randint(100))

    assert_raises(ValueError, check_random_state, "some invalid seed")
Beispiel #9
0
def check_estimators_empty_data_messages(name, Estimator):
    e = Estimator()
    set_fast_parameters(e)
    set_random_state(e, 1)

    X_zero_samples = np.empty(0).reshape(0, 3)
    # The precise message can change depending on whether X or y is
    # validated first. Let us test the type of exception only:
    assert_raises(ValueError, e.fit, X_zero_samples, [])

    X_zero_features = np.empty(0).reshape(3, 0)
    # the following y should be accepted by both classifiers and regressors
    # and ignored by unsupervised models
    y = multioutput_estimator_convert_y_2d(name, np.array([1, 0, 1]))
    msg = "0 feature(s) (shape=(3, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, e.fit, X_zero_features, y)
Beispiel #10
0
def check_estimators_partial_fit_n_features(name, Alg):
    # check if number of features changes between calls to partial_fit.
    if not hasattr(Alg, 'partial_fit'):
        return
    X, y = make_blobs(n_samples=50, random_state=1)
    X -= X.min()
    with warnings.catch_warnings(record=True):
        alg = Alg()
    set_fast_parameters(alg)
    if isinstance(alg, ClassifierMixin):
        classes = np.unique(y)
        alg.partial_fit(X, y, classes=classes)
    else:
        alg.partial_fit(X, y)

    assert_raises(ValueError, alg.partial_fit, X[:, :-1], y)
Beispiel #11
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Beispiel #12
0
def test_input_shape():
    """Check changed dimensions cause failure"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    X2 = np.reshape(random_state.uniform(size=45), (5, 9))

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.predict, X2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(X, y)
    assert_raises(ValueError, est.transform, X2)
Beispiel #13
0
def check_classifiers_pickle(name, Classifier):
    X, y = make_blobs(random_state=0)
    X, y = shuffle(X, y, random_state=7)
    X -= X.min()

    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    set_fast_parameters(classifier)
    # raises error on malformed input for fit
    assert_raises(ValueError, classifier.fit, X, y[:-1])

    # fit
    classifier.fit(X, y)
    y_pred = classifier.predict(X)
    pickled_classifier = pickle.dumps(classifier)
    unpickled_classifier = pickle.loads(pickled_classifier)
    pickled_y_pred = unpickled_classifier.predict(X)

    assert_array_almost_equal(pickled_y_pred, y_pred)
Beispiel #14
0
def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y)
Beispiel #15
0
def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y)
Beispiel #16
0
def test_validate_functions():
    """Check that valid functions are accepted & invalid ones raise error"""

    random_state = check_random_state(415)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # These should be fine
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=(add2, sub2, mul2, div2))
        est.fit(boston.data, boston.target)
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=('add', 'sub', 'mul', div2))
        est.fit(boston.data, boston.target)

        # These should fail
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=('ni', 'sub', 'mul', div2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(generations=2,
                       random_state=0,
                       function_set=(7, 'sub', 'mul', div2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(generations=2, random_state=0, function_set=())
        assert_raises(ValueError, est.fit, boston.data, boston.target)
Beispiel #17
0
def test_indices():
    """Check that indices are stable when generated on the fly."""

    params = {
        'function_set': [add2, sub2, mul2, div2],
        'arities': {
            2: [add2, sub2, mul2, div2]
        },
        'init_depth': (2, 6),
        'init_method': 'half and half',
        'n_features': 10,
        'const_range': (-1.0, 1.0),
        'metric': 'mean absolute error',
        'p_point_replace': 0.05,
        'parsimony_coefficient': 0.1
    }
    random_state = check_random_state(415)
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)

    assert_raises(ValueError, gp.get_all_indices)
    assert_raises(ValueError, gp._indices)

    def get_indices_property():
        return gp.indices_

    assert_raises(ValueError, get_indices_property)

    indices, _ = gp.get_all_indices(10, 7, random_state)

    assert_array_equal(indices, gp.get_all_indices()[0])
    assert_array_equal(indices, gp._indices())
    assert_array_equal(indices, gp.indices_)
Beispiel #18
0
def test_all_metrics():
    """Check all supported metrics work"""

    params = {
        'function_set': [add2, sub2, mul2, div2],
        'arities': {
            2: [add2, sub2, mul2, div2]
        },
        'init_depth': (2, 6),
        'init_method': 'half and half',
        'n_features': 10,
        'const_range': (-1.0, 1.0),
        'metric': 'mean absolute error',
        'p_point_replace': 0.05,
        'parsimony_coefficient': 0.1
    }
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    sample_weight = np.ones(5)
    expected = [
        1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258,
        -0.2928200724, -0.5
    ]
    result = []
    for m in [
            'mean absolute error', 'mse', 'rmse', 'rmsle', 'pearson',
            'spearman'
    ]:
        gp.metric = m
        gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight)
        result.append(gp.fitness())
    assert_array_almost_equal(result, expected)
    # And check a fake one
    gp.metric = 'the larch'
    assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
Beispiel #19
0
def check_estimators_unfitted(name, Estimator):
    """Check if NotFittedError is raised when calling predict and related
    functions"""

    # Common test for Regressors as well as Classifiers
    X, y = _boston_subset()

    with warnings.catch_warnings(record=True):
        est = Estimator()

    assert_raises(NotFittedError, est.predict, X)

    if hasattr(est, 'predict'):
        assert_raises(NotFittedError, est.predict, X)

    if hasattr(est, 'decision_function'):
        assert_raises(NotFittedError, est.decision_function, X)

    if hasattr(est, 'predict_proba'):
        assert_raises(NotFittedError, est.predict_proba, X)

    if hasattr(est, 'predict_log_proba'):
        assert_raises(NotFittedError, est.predict_log_proba, X)
Beispiel #20
0
def test_validate_program():
    """Check that valid programs are accepted & invalid ones raise error"""

    function_set = [add2, sub2, mul2, div2, sqrt1, log1, abs1, max2, min2]
    arities = {
        1: [sqrt1, log1, abs1],
        2: [add2, sub2, mul2, div2, max2, min2]
    },
    init_depth = (2, 6)
    init_method = 'half and half'
    n_features = 10
    const_range = (-1.0, 1.0)
    metric = 'mean absolute error'
    p_point_replace = 0.05
    parsimony_coefficient = 0.1

    random_state = check_random_state(415)
    test_gp = [
        sub2, abs1, sqrt1, log1, log1, sqrt1, 7, abs1, abs1, abs1, log1, sqrt1,
        2
    ]

    # This one should be fine
    _ = _Program(function_set, arities, init_depth, init_method, n_features,
                 const_range, metric, p_point_replace, parsimony_coefficient,
                 random_state, test_gp)

    # Now try a couple that shouldn't be
    assert_raises(ValueError, _Program, function_set, arities, init_depth,
                  init_method, n_features, const_range, metric,
                  p_point_replace, parsimony_coefficient, random_state,
                  test_gp[:-1])
    assert_raises(ValueError, _Program, function_set, arities, init_depth,
                  init_method, n_features, const_range, metric,
                  p_point_replace, parsimony_coefficient, random_state,
                  test_gp + [1])
Beispiel #21
0
def test_validate_fitness():
    """Check that valid fitness measures are accepted & invalid raise error"""

    # Check arg count checks
    fun = make_fitness(function=_mean_square_error, greater_is_better=True)
    # non-bool greater_is_better
    assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure')
    assert_raises(ValueError, make_fitness, _mean_square_error, 1)

    # Check arg count tests
    def bad_fun1(x1, x2):
        return 1.0
    assert_raises(ValueError, make_fitness, bad_fun1, True)

    # Check return type tests
    def bad_fun2(x1, x2, w):
        return 'ni'
    assert_raises(ValueError, make_fitness, bad_fun2, True)
Beispiel #22
0
def test_program_input_validation():
    """Check that guarded input validation raises errors"""

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # Check too much proba
        est = Symbolic(p_point_mutation=.5)
        assert_raises(ValueError, est.fit, boston.data, boston.target)

        # Check invalid init_method
        est = Symbolic(init_method='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)

        # Check invalid const_ranges
        est = Symbolic(const_range=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range=[2, 2])
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range=(2, 2, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        # And check acceptable, but strange, representations of init_depth
        est = Symbolic(const_range=(2, 2))
        est.fit(boston.data, boston.target)
        est = Symbolic(const_range=(4, 2))
        est.fit(boston.data, boston.target)

        # Check invalid init_depth
        est = Symbolic(init_depth=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=[2, 2])
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=(2, 2, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=(4, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        # And check acceptable, but strange, representations of init_depth
        est = Symbolic(init_depth=(2, 2))
        est.fit(boston.data, boston.target)

    # Check hall_of_fame and n_components for transformer
    est = SymbolicTransformer(hall_of_fame=1000)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(n_components=1000)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(hall_of_fame=0)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(n_components=0)
    assert_raises(ValueError, est.fit, boston.data, boston.target)

    # Check regressor metrics
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle']:
        est = SymbolicRegressor(generations=2, metric=m)
        est.fit(boston.data, boston.target)
    # And check the transformer metrics as well as a fake one
    for m in ['pearson', 'spearman', 'the larch']:
        est = SymbolicRegressor(generations=2, metric=m)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
    # Check transformer metrics
    for m in ['pearson', 'spearman']:
        est = SymbolicTransformer(generations=2, metric=m)
        est.fit(boston.data, boston.target)
    # And check the regressor metrics as well as a fake one
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle', 'the larch']:
        est = SymbolicTransformer(generations=2, metric=m)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
Beispiel #23
0
def test_resample_value_errors():
    # Check that invalid arguments yield ValueError
    assert_raises(ValueError, resample, [0], [0, 1])
    assert_raises(ValueError, resample, [0, 1], [0, 1], n_samples=3)
    assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
Beispiel #24
0
def check_classifiers_train(name, Classifier):
    X_m, y_m = make_blobs(random_state=0)
    X_m, y_m = shuffle(X_m, y_m, random_state=7)
    X_m = StandardScaler().fit_transform(X_m)
    # generate binary problem from multi-class one
    y_b = y_m[y_m != 2]
    X_b = X_m[y_m != 2]
    for (X, y) in [(X_m, y_m), (X_b, y_b)]:
        # catch deprecation warnings
        classes = np.unique(y)
        n_classes = len(classes)
        n_samples, n_features = X.shape
        with warnings.catch_warnings(record=True):
            classifier = Classifier()
        if name in ['BernoulliNB', 'MultinomialNB']:
            X -= X.min()
        set_fast_parameters(classifier)
        set_random_state(classifier)
        # raises error on malformed input for fit
        assert_raises(ValueError, classifier.fit, X, y[:-1])

        # fit
        classifier.fit(X, y)
        # with lists
        classifier.fit(X.tolist(), y.tolist())
        assert_true(hasattr(classifier, "classes_"))
        y_pred = classifier.predict(X)
        assert_equal(y_pred.shape, (n_samples,))
        # training set performance
        if name not in ['BernoulliNB', 'MultinomialNB']:
            assert_greater(accuracy_score(y, y_pred), 0.83)

        # raises error on malformed input for predict
        assert_raises(ValueError, classifier.predict, X.T)
        if hasattr(classifier, "decision_function"):
            try:
                # decision_function agrees with predict
                decision = classifier.decision_function(X)
                if n_classes is 2:
                    assert_equal(decision.shape, (n_samples,))
                    dec_pred = (decision.ravel() > 0).astype(np.int)
                    assert_array_equal(dec_pred, y_pred)
                if n_classes is 3:
                    assert_equal(decision.shape, (n_samples, n_classes))
                    assert_array_equal(np.argmax(decision, axis=1), y_pred)

                # raises error on malformed input
                assert_raises(ValueError,
                              classifier.decision_function, X.T)
                # raises error on malformed input for decision_function
                assert_raises(ValueError,
                              classifier.decision_function, X.T)
            except NotImplementedError:
                pass
        if hasattr(classifier, "predict_proba"):
            # predict_proba agrees with predict
            y_prob = classifier.predict_proba(X)
            assert_equal(y_prob.shape, (n_samples, n_classes))
            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
            # check that probas for all classes sum to one
            assert_array_almost_equal(np.sum(y_prob, axis=1),
                                      np.ones(n_samples))
            # raises error on malformed input
            assert_raises(ValueError, classifier.predict_proba, X.T)
            # raises error on malformed input for predict_proba
            assert_raises(ValueError, classifier.predict_proba, X.T)
Beispiel #25
0
def test_resample_value_errors():
    # Check that invalid arguments yield ValueError
    assert_raises(ValueError, resample, [0], [0, 1])
    assert_raises(ValueError, resample, [0, 1], [0, 1], n_samples=3)
    assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
Beispiel #26
0
def _check_transformer(name, Transformer, X, y):
    if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit():
        # Those transformers yield non-deterministic output when executed on
        # a 32bit Python. The same transformers are stable on 64bit Python.
        # FIXME: try to isolate a minimalistic reproduction case only depending
        # on numpy & scipy and/or maybe generate a test dataset that does not
        # cause such unstable behaviors.
        msg = name + ' is non deterministic on 32bit Python'
        raise SkipTest(msg)
    n_samples, n_features = np.asarray(X).shape
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        transformer = Transformer()
    set_random_state(transformer)
    set_fast_parameters(transformer)

    # fit

    if name in CROSS_DECOMPOSITION:
        y_ = np.c_[y, y]
        y_[::2, 1] *= 2
    else:
        y_ = y

    transformer.fit(X, y_)
    X_pred = transformer.fit_transform(X, y=y_)
    if isinstance(X_pred, tuple):
        for x_pred in X_pred:
            assert_equal(x_pred.shape[0], n_samples)
    else:
        assert_equal(X_pred.shape[0], n_samples)

    if hasattr(transformer, 'transform'):
        if name in CROSS_DECOMPOSITION:
            X_pred2 = transformer.transform(X, y_)
            X_pred3 = transformer.fit_transform(X, y=y_)
        else:
            X_pred2 = transformer.transform(X)
            X_pred3 = transformer.fit_transform(X, y=y_)
        if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):
            for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3):
                assert_array_almost_equal(
                    x_pred, x_pred2, 2,
                    "fit_transform and transform outcomes not consistent in %s"
                    % Transformer)
                assert_array_almost_equal(
                    x_pred, x_pred3, 2,
                    "consecutive fit_transform outcomes not consistent in %s"
                    % Transformer)
        else:
            assert_array_almost_equal(
                X_pred, X_pred2, 2,
                "fit_transform and transform outcomes not consistent in %s"
                % Transformer)
            assert_array_almost_equal(
                X_pred, X_pred3, 2,
                "consecutive fit_transform outcomes not consistent in %s"
                % Transformer)

        # raises error on malformed input for transform
        if hasattr(X, 'T'):
            # If it's not an array, it does not have a 'T' property
            assert_raises(ValueError, transformer.transform, X.T)
Beispiel #27
0
def test_compute_class_weight_not_present():
    # Raise error when y does not contain all class labels
    classes = np.arange(4)
    y = np.asarray([0, 0, 0, 1, 1, 2])
    assert_raises(ValueError, compute_class_weight, "auto", classes, y)
Beispiel #28
0
def test_compute_sample_weight_errors():
    # Test compute_sample_weight raises errors expected.
    # Invalid preset string
    y = np.asarray([1, 1, 1, 2, 2, 2])
    y_ = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
    assert_raises(ValueError, compute_sample_weight, "ni", y)
    assert_raises(ValueError, compute_sample_weight, "ni", y, range(4))
    assert_raises(ValueError, compute_sample_weight, "ni", y_)
    assert_raises(ValueError, compute_sample_weight, "ni", y_, range(4))

    # Not "auto" for subsample
    assert_raises(ValueError,
                  compute_sample_weight, {1: 2, 2: 1}, y, range(4))

    # Not a list or preset for multi-output
    assert_raises(ValueError, compute_sample_weight, {1: 2, 2: 1}, y_)

    # Incorrect length list for multi-output
    assert_raises(ValueError, compute_sample_weight, [{1: 2, 2: 1}], y_)
Beispiel #29
0
def test_validate_function():
    """Check that valid functions are accepted & invalid ones raise error"""

    # Check arity tests
    fun = make_function(function=_protected_sqrt, name='sqrt', arity=1)
    # non-integer arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', '1')
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1.0)
    # non-matching arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 2)
    assert_raises(ValueError, make_function, maximum, 'max', 1)

    # Check name test
    assert_raises(ValueError, make_function, _protected_sqrt, 2, 1)

    # Check return type tests
    def bad_fun1(x1, x2):
        return 'ni'

    assert_raises(ValueError, make_function, bad_fun1, 'ni', 2)

    # Check return shape tests
    def bad_fun2(x1):
        return np.ones((2, 1))

    assert_raises(ValueError, make_function, bad_fun2, 'ni', 1)

    # Check closure for negatives test
    def _unprotected_sqrt(x1):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.sqrt(x1)

    assert_raises(ValueError, make_function, _unprotected_sqrt, 'sqrt', 1)

    # Check closure for zeros test
    def _unprotected_div(x1, x2):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.divide(x1, x2)

    assert_raises(ValueError, make_function, _unprotected_div, 'div', 2)
Beispiel #30
0
def test_program_input_validation():
    """Check that guarded input validation raises errors"""

    for Symbolic in (SymbolicRegressor, SymbolicTransformer):
        # Check too much proba
        est = Symbolic(p_point_mutation=.5)
        assert_raises(ValueError, est.fit, boston.data, boston.target)

        # Check invalid init_method
        est = Symbolic(init_method='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)

        # Check invalid const_ranges
        est = Symbolic(const_range=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range=[2, 2])
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range=(2, 2, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(const_range='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        # And check acceptable, but strange, representations of init_depth
        est = Symbolic(const_range=(2, 2))
        est.fit(boston.data, boston.target)
        est = Symbolic(const_range=(4, 2))
        est.fit(boston.data, boston.target)

        # Check invalid init_depth
        est = Symbolic(init_depth=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=2)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=[2, 2])
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=(2, 2, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth='ni')
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        est = Symbolic(init_depth=(4, 2))
        assert_raises(ValueError, est.fit, boston.data, boston.target)
        # And check acceptable, but strange, representations of init_depth
        est = Symbolic(init_depth=(2, 2))
        est.fit(boston.data, boston.target)

    # Check hall_of_fame and n_components for transformer
    est = SymbolicTransformer(hall_of_fame=2000)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(n_components=2000)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(hall_of_fame=0)
    assert_raises(ValueError, est.fit, boston.data, boston.target)
    est = SymbolicTransformer(n_components=0)
    assert_raises(ValueError, est.fit, boston.data, boston.target)

    # Check regressor metrics
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle']:
        est = SymbolicRegressor(generations=2, metric=m)
        est.fit(boston.data, boston.target)
    # And check the transformer metrics as well as a fake one
    for m in ['pearson', 'spearman', 'the larch']:
        est = SymbolicRegressor(generations=2, metric=m)
        assert_raises(ValueError, est.fit, boston.data, boston.target)
    # Check transformer metrics
    for m in ['pearson', 'spearman']:
        est = SymbolicTransformer(generations=2, metric=m)
        est.fit(boston.data, boston.target)
    # And check the regressor metrics as well as a fake one
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle', 'the larch']:
        est = SymbolicTransformer(generations=2, metric=m)
        assert_raises(ValueError, est.fit, boston.data, boston.target)