Example #1
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(generations=2, random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(generations=2, random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(generations=2, random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(generations=2, random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(generations=2, random_state=0)
    est2 = est2.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicTransformer(generations=2, random_state=0)
    est3 = est3.fit_transform(boston.data, boston.target,
                              sample_weight=sample_weight * 1.1)

    assert_array_almost_equal(est1, est2)
    assert_array_almost_equal(est1, est3)
Example #2
0
def check_transformer_pickle(name, Transformer):
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        transformer = Transformer()
    if not hasattr(transformer, 'transform'):
        return
    set_random_state(transformer)
    set_fast_parameters(transformer)

    # fit
    if name in CROSS_DECOMPOSITION:
        random_state = np.random.RandomState(seed=12345)
        y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    transformer.fit(X, y_)
    X_pred = transformer.fit(X, y_).transform(X)
    pickled_transformer = pickle.dumps(transformer)
    unpickled_transformer = pickle.loads(pickled_transformer)
    pickled_X_pred = unpickled_transformer.transform(X)

    assert_array_almost_equal(pickled_X_pred, X_pred)
Example #3
0
def check_regressors_int(name, Regressor):
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    y = multioutput_estimator_convert_y_2d(name, y)
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        # separate estimators to control random seeds
        regressor_1 = Regressor()
        regressor_2 = Regressor()
    set_fast_parameters(regressor_1)
    set_fast_parameters(regressor_2)
    set_random_state(regressor_1)
    set_random_state(regressor_2)

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    # fit
    regressor_1.fit(X, y_)
    pred1 = regressor_1.predict(X)
    regressor_2.fit(X, y_.astype(np.float))
    pred2 = regressor_2.predict(X)
    assert_array_almost_equal(pred1, pred2, 2, name)
Example #4
0
def test_all_metrics():
    """Check all supported metrics work"""

    params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'],
              'arities': {2: ['add2', 'sub2', 'mul2', 'div2']},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    sample_weight = np.ones(5)
    expected = [1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258,
                -0.2928200724, -0.5]
    result = []
    for m in ['mean absolute error', 'mse', 'rmse', 'rmsle',
              'pearson', 'spearman']:
        gp.metric = m
        gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight)
        result.append(gp.fitness())
    assert_array_almost_equal(result, expected)
    # And check a fake one
    gp.metric = 'the larch'
    assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
Example #5
0
def check_regressors_pickle(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)   # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    regressor.fit(X, y_)
    y_pred = regressor.predict(X)
    # store old predictions
    pickled_regressor = pickle.dumps(regressor)
    unpickled_regressor = pickle.loads(pickled_regressor)
    pickled_y_pred = unpickled_regressor.predict(X)
    assert_array_almost_equal(pickled_y_pred, y_pred)
Example #6
0
def check_class_weight_auto_linear_classifier(name, Classifier):
    """Test class weights with non-contiguous class labels."""
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
                  [1.0, 1.0], [1.0, 0.0]])
    y = [1, 1, 1, -1, -1]

    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    if hasattr(classifier, "n_iter"):
        # This is a very small dataset, default n_iter are likely to prevent
        # convergence
        classifier.set_params(n_iter=1000)
    set_random_state(classifier)

    # Let the model compute the class frequencies
    classifier.set_params(class_weight='auto')
    coef_auto = classifier.fit(X, y).coef_.copy()

    # Count each label occurrence to reweight manually
    mean_weight = (1. / 3 + 1. / 2) / 2
    class_weight = {
        1: 1. / 3 / mean_weight,
        -1: 1. / 2 / mean_weight,
    }
    classifier.set_params(class_weight=class_weight)
    coef_manual = classifier.fit(X, y).coef_.copy()

    assert_array_almost_equal(coef_auto, coef_manual)
Example #7
0
def test_sample_weight():
    """Check sample_weight param works"""

    # Check constant sample_weight has no effect
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicRegressor(generations=2, random_state=0)
    est1.fit(boston.data, boston.target)
    est2 = SymbolicRegressor(generations=2, random_state=0)
    est2.fit(boston.data, boston.target, sample_weight=sample_weight)
    # And again with a scaled sample_weight
    est3 = SymbolicRegressor(generations=2, random_state=0)
    est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1)

    assert_almost_equal(est1._program.fitness_, est2._program.fitness_)
    assert_almost_equal(est1._program.fitness_, est3._program.fitness_)

    # And again for the transformer
    sample_weight = np.ones(boston.target.shape[0])
    est1 = SymbolicTransformer(generations=2, random_state=0)
    est1 = est1.fit_transform(boston.data, boston.target)
    est2 = SymbolicTransformer(generations=2, random_state=0)
    est2 = est2.fit_transform(boston.data,
                              boston.target,
                              sample_weight=sample_weight)

    assert_array_almost_equal(est1, est2)
Example #8
0
def check_pipeline_consistency(name, Estimator):
    if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit():
        # Those transformers yield non-deterministic output when executed on
        # a 32bit Python. The same transformers are stable on 64bit Python.
        # FIXME: try to isolate a minimalistic reproduction case only depending
        # scipy and/or maybe generate a test dataset that does not
        # cause such unstable behaviors.
        msg = name + ' is non deterministic on 32bit Python'
        raise SkipTest(msg)

    # check that make_pipeline(est) gives same score as est
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X -= X.min()
    y = multioutput_estimator_convert_y_2d(name, y)
    estimator = Estimator()
    set_fast_parameters(estimator)
    set_random_state(estimator)
    pipeline = make_pipeline(estimator)
    estimator.fit(X, y)
    pipeline.fit(X, y)
    funcs = ["score", "fit_transform"]
    for func_name in funcs:
        func = getattr(estimator, func_name, None)
        if func is not None:
            func_pipeline = getattr(pipeline, func_name)
            result = func(X, y)
            result_pipe = func_pipeline(X, y)
            assert_array_almost_equal(result, result_pipe)
Example #9
0
def test_execute():
    """Check executing the program works"""

    params = {
        'function_set': [add2, sub2, mul2, div2],
        'arities': {
            2: [add2, sub2, mul2, div2]
        },
        'init_depth': (2, 6),
        'init_method': 'half and half',
        'n_features': 10,
        'const_range': (-1.0, 1.0),
        'metric': 'mean absolute error',
        'p_point_replace': 0.05,
        'parsimony_coefficient': 0.1
    }
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    gp = _Program(random_state=random_state, program=test_gp, **params)
    result = gp.execute(X)
    expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618]
    assert_array_almost_equal(result, expected)
Example #10
0
def test_compute_class_weight_auto_unordered():
    # Test compute_class_weight when classes are unordered
    classes = np.array([1, 0, 3])
    y = np.asarray([1, 0, 0, 3, 3, 3])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)
Example #11
0
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100, hall_of_fame=50,
                            generations=4, n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Example #12
0
def test_compute_class_weight_auto_negative():
    # Test compute_class_weight when labels are negative
    # Test with balanced class labels.
    classes = np.array([-2, -1, 0])
    y = np.asarray([-1, -1, 0, 0, -2, -2])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    # Test with unbalanced class labels.
    y = np.asarray([-1, 0, 0, -2, -2, -2])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)
Example #13
0
def check_classifiers_pickle(name, Classifier):
    X, y = make_blobs(random_state=0)
    X, y = shuffle(X, y, random_state=7)
    X -= X.min()

    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    set_fast_parameters(classifier)
    # raises error on malformed input for fit
    assert_raises(ValueError, classifier.fit, X, y[:-1])

    # fit
    classifier.fit(X, y)
    y_pred = classifier.predict(X)
    pickled_classifier = pickle.dumps(classifier)
    unpickled_classifier = pickle.loads(pickled_classifier)
    pickled_y_pred = unpickled_classifier.predict(X)

    assert_array_almost_equal(pickled_y_pred, y_pred)
Example #14
0
def test_execute():
    """Check executing the program works"""

    params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'],
              'arities': {2: ['add2', 'sub2', 'mul2', 'div2']},
              'init_depth': (2, 6),
              'init_method': 'half and half',
              'n_features': 10,
              'const_range': (-1.0, 1.0),
              'metric': 'mean absolute error',
              'p_point_replace': 0.05,
              'parsimony_coefficient': 0.1}
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5]
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    gp = _Program(random_state=random_state, program=test_gp, **params)
    result = gp.execute(X)
    expected = [-0.19656208, 0.78197782, -1.70123845, -0.60175969, -0.01082618]
    assert_array_almost_equal(result, expected)
Example #15
0
def test_all_metrics():
    """Check all supported metrics work"""

    params = {
        'function_set': [add2, sub2, mul2, div2],
        'arities': {
            2: [add2, sub2, mul2, div2]
        },
        'init_depth': (2, 6),
        'init_method': 'half and half',
        'n_features': 10,
        'const_range': (-1.0, 1.0),
        'metric': 'mean absolute error',
        'p_point_replace': 0.05,
        'parsimony_coefficient': 0.1
    }
    random_state = check_random_state(415)

    # Test for a small program
    test_gp = [mul2, div2, 8, 1, sub2, 9, .5]
    gp = _Program(random_state=random_state, program=test_gp, **params)
    X = np.reshape(random_state.uniform(size=50), (5, 10))
    y = random_state.uniform(size=5)
    sample_weight = np.ones(5)
    expected = [
        1.48719809776, 1.82389179833, 1.76013763179, 0.98663772258,
        -0.2928200724, -0.5
    ]
    result = []
    for m in [
            'mean absolute error', 'mse', 'rmse', 'rmsle', 'pearson',
            'spearman'
    ]:
        gp.metric = m
        gp.raw_fitness_ = gp.raw_fitness(X, y, sample_weight)
        result.append(gp.fitness())
    assert_array_almost_equal(result, expected)
    # And check a fake one
    gp.metric = 'the larch'
    assert_raises(ValueError, gp.raw_fitness, X, y, sample_weight)
Example #16
0
def check_estimators_data_not_an_array(name, Estimator, X, y):

    if name in CROSS_DECOMPOSITION:
        raise SkipTest
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        # separate estimators to control random seeds
        estimator_1 = Estimator()
        estimator_2 = Estimator()
    set_fast_parameters(estimator_1)
    set_fast_parameters(estimator_2)
    set_random_state(estimator_1)
    set_random_state(estimator_2)

    y_ = NotAnArray(np.asarray(y))
    X_ = NotAnArray(np.asarray(X))

    # fit
    estimator_1.fit(X_, y_)
    pred1 = estimator_1.predict(X_)
    estimator_2.fit(X, y)
    pred2 = estimator_2.predict(X)
    assert_array_almost_equal(pred1, pred2, 2, name)
Example #17
0
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)
Example #18
0
def test_pickle():
    """Check pickability"""

    # Check the regressor
    est = SymbolicRegressor(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    score = est.score(boston.data[500:, :], boston.target[500:])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    score2 = est2.score(boston.data[500:, :], boston.target[500:])
    assert_equal(score, score2)

    # Check the transformer
    est = SymbolicTransformer(generations=2, random_state=0)
    est.fit(boston.data[:100, :], boston.target[:100])
    X_new = est.transform(boston.data[500:, :])
    pickle_object = pickle.dumps(est)

    est2 = pickle.loads(pickle_object)
    assert_equal(type(est2), est.__class__)
    X_new2 = est2.transform(boston.data[500:, :])
    assert_array_almost_equal(X_new, X_new2)
Example #19
0
def test_parallel_train():
    """Check predictions are the same for different n_jobs"""

    # Check the regressor
    ests = [
        SymbolicRegressor(population_size=100,
                          generations=4,
                          n_jobs=n_jobs,
                          random_state=0).fit(boston.data[:100, :],
                                              boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.predict(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)

    # Check the transformer
    ests = [
        SymbolicTransformer(population_size=100,
                            hall_of_fame=50,
                            generations=4,
                            n_jobs=n_jobs,
                            random_state=0).fit(boston.data[:100, :],
                                                boston.target[:100])
        for n_jobs in [1, 2, 3, 8, 16]
    ]

    preds = [e.transform(boston.data[500:, :]) for e in ests]
    for pred1, pred2 in zip(preds, preds[1:]):
        assert_array_almost_equal(pred1, pred2)
    lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests])
    for len1, len2 in zip(lengths, lengths[1:]):
        assert_array_almost_equal(len1, len2)
Example #20
0
def check_classifiers_train(name, Classifier):
    X_m, y_m = make_blobs(random_state=0)
    X_m, y_m = shuffle(X_m, y_m, random_state=7)
    X_m = StandardScaler().fit_transform(X_m)
    # generate binary problem from multi-class one
    y_b = y_m[y_m != 2]
    X_b = X_m[y_m != 2]
    for (X, y) in [(X_m, y_m), (X_b, y_b)]:
        # catch deprecation warnings
        classes = np.unique(y)
        n_classes = len(classes)
        n_samples, n_features = X.shape
        with warnings.catch_warnings(record=True):
            classifier = Classifier()
        if name in ['BernoulliNB', 'MultinomialNB']:
            X -= X.min()
        set_fast_parameters(classifier)
        set_random_state(classifier)
        # raises error on malformed input for fit
        assert_raises(ValueError, classifier.fit, X, y[:-1])

        # fit
        classifier.fit(X, y)
        # with lists
        classifier.fit(X.tolist(), y.tolist())
        assert_true(hasattr(classifier, "classes_"))
        y_pred = classifier.predict(X)
        assert_equal(y_pred.shape, (n_samples,))
        # training set performance
        if name not in ['BernoulliNB', 'MultinomialNB']:
            assert_greater(accuracy_score(y, y_pred), 0.83)

        # raises error on malformed input for predict
        assert_raises(ValueError, classifier.predict, X.T)
        if hasattr(classifier, "decision_function"):
            try:
                # decision_function agrees with predict
                decision = classifier.decision_function(X)
                if n_classes is 2:
                    assert_equal(decision.shape, (n_samples,))
                    dec_pred = (decision.ravel() > 0).astype(np.int)
                    assert_array_equal(dec_pred, y_pred)
                if n_classes is 3:
                    assert_equal(decision.shape, (n_samples, n_classes))
                    assert_array_equal(np.argmax(decision, axis=1), y_pred)

                # raises error on malformed input
                assert_raises(ValueError,
                              classifier.decision_function, X.T)
                # raises error on malformed input for decision_function
                assert_raises(ValueError,
                              classifier.decision_function, X.T)
            except NotImplementedError:
                pass
        if hasattr(classifier, "predict_proba"):
            # predict_proba agrees with predict
            y_prob = classifier.predict_proba(X)
            assert_equal(y_prob.shape, (n_samples, n_classes))
            assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
            # check that probas for all classes sum to one
            assert_array_almost_equal(np.sum(y_prob, axis=1),
                                      np.ones(n_samples))
            # raises error on malformed input
            assert_raises(ValueError, classifier.predict_proba, X.T)
            # raises error on malformed input for predict_proba
            assert_raises(ValueError, classifier.predict_proba, X.T)
Example #21
0
def _check_transformer(name, Transformer, X, y):
    if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit():
        # Those transformers yield non-deterministic output when executed on
        # a 32bit Python. The same transformers are stable on 64bit Python.
        # FIXME: try to isolate a minimalistic reproduction case only depending
        # on numpy & scipy and/or maybe generate a test dataset that does not
        # cause such unstable behaviors.
        msg = name + ' is non deterministic on 32bit Python'
        raise SkipTest(msg)
    n_samples, n_features = np.asarray(X).shape
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        transformer = Transformer()
    set_random_state(transformer)
    set_fast_parameters(transformer)

    # fit

    if name in CROSS_DECOMPOSITION:
        y_ = np.c_[y, y]
        y_[::2, 1] *= 2
    else:
        y_ = y

    transformer.fit(X, y_)
    X_pred = transformer.fit_transform(X, y=y_)
    if isinstance(X_pred, tuple):
        for x_pred in X_pred:
            assert_equal(x_pred.shape[0], n_samples)
    else:
        assert_equal(X_pred.shape[0], n_samples)

    if hasattr(transformer, 'transform'):
        if name in CROSS_DECOMPOSITION:
            X_pred2 = transformer.transform(X, y_)
            X_pred3 = transformer.fit_transform(X, y=y_)
        else:
            X_pred2 = transformer.transform(X)
            X_pred3 = transformer.fit_transform(X, y=y_)
        if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):
            for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3):
                assert_array_almost_equal(
                    x_pred, x_pred2, 2,
                    "fit_transform and transform outcomes not consistent in %s"
                    % Transformer)
                assert_array_almost_equal(
                    x_pred, x_pred3, 2,
                    "consecutive fit_transform outcomes not consistent in %s"
                    % Transformer)
        else:
            assert_array_almost_equal(
                X_pred, X_pred2, 2,
                "fit_transform and transform outcomes not consistent in %s"
                % Transformer)
            assert_array_almost_equal(
                X_pred, X_pred3, 2,
                "consecutive fit_transform outcomes not consistent in %s"
                % Transformer)

        # raises error on malformed input for transform
        if hasattr(X, 'T'):
            # If it's not an array, it does not have a 'T' property
            assert_raises(ValueError, transformer.transform, X.T)
Example #22
0
def test_compute_sample_weight_with_subsample():
    # Test compute_sample_weight with subsamples specified.
    # Test with balanced classes and all samples present
    y = np.asarray([1, 1, 1, 2, 2, 2])
    sample_weight = compute_sample_weight("auto", y, range(6))
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])

    # Test with column vector of balanced classes and all samples present
    y = np.asarray([[1], [1], [1], [2], [2], [2]])
    sample_weight = compute_sample_weight("auto", y, range(6))
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])

    # Test with a subsample
    y = np.asarray([1, 1, 1, 2, 2, 2])
    sample_weight = compute_sample_weight("auto", y, range(4))
    assert_array_almost_equal(sample_weight, [.5, .5, .5, 1.5, 1.5, 1.5])

    # Test with a bootstrap subsample
    y = np.asarray([1, 1, 1, 2, 2, 2])
    sample_weight = compute_sample_weight("auto", y, [0, 1, 1, 2, 2, 3])
    expected = np.asarray([1/3., 1/3., 1/3., 5/3., 5/3., 5/3.])
    assert_array_almost_equal(sample_weight, expected)

    # Test with a bootstrap subsample for multi-output
    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
    sample_weight = compute_sample_weight("auto", y, [0, 1, 1, 2, 2, 3])
    assert_array_almost_equal(sample_weight, expected ** 2)

    # Test with a missing class
    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
    sample_weight = compute_sample_weight("auto", y, range(6))
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])

    # Test with a missing class for multi-output
    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
    sample_weight = compute_sample_weight("auto", y, range(6))
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
Example #23
0
def test_compute_sample_weight():
    # Test (and demo) compute_sample_weight.
    # Test with balanced classes
    y = np.asarray([1, 1, 1, 2, 2, 2])
    sample_weight = compute_sample_weight("auto", y)
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])

    # Test with user-defined weights
    sample_weight = compute_sample_weight({1: 2, 2: 1}, y)
    assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.])

    # Test with column vector of balanced classes
    y = np.asarray([[1], [1], [1], [2], [2], [2]])
    sample_weight = compute_sample_weight("auto", y)
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])

    # Test with unbalanced classes
    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
    sample_weight = compute_sample_weight("auto", y)
    expected = np.asarray([.6, .6, .6, .6, .6, .6, 1.8])
    assert_array_almost_equal(sample_weight, expected)

    # Test with `None` weights
    sample_weight = compute_sample_weight(None, y)
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.])

    # Test with multi-output of balanced classes
    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
    sample_weight = compute_sample_weight("auto", y)
    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])

    # Test with multi-output with user-defined weights
    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
    sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y)
    assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.])

    # Test with multi-output of unbalanced classes
    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
    sample_weight = compute_sample_weight("auto", y)
    assert_array_almost_equal(sample_weight, expected ** 2)