Example #1
0
def test_symbolic_classifier():
    """Check that SymbolicClassifier example works"""

    rng = check_random_state(0)
    cancer = load_breast_cancer()
    perm = rng.permutation(cancer.target.size)
    cancer.data = cancer.data[perm]
    cancer.target = cancer.target[perm]

    est = SymbolicClassifier(parsimony_coefficient=.01,
                             feature_names=cancer.feature_names,
                             random_state=1)
    est.fit(cancer.data[:400], cancer.target[:400])

    y_true = cancer.target[400:]
    y_score = est.predict_proba(cancer.data[400:])[:, 1]
    assert_almost_equal(roc_auc_score(y_true, y_score), 0.96937869822485212)

    dot_data = est._program.export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="div", fillcolor="#136ed4"] '
                ';\n2 [label="worst fractal dimension", fillcolor="#60a6f6"] '
                ';\n3 [label="mean concave points", fillcolor="#60a6f6"] '
                ';\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", fillcolor="#136ed4"] '
                ';\n5 [label="mean concave points", fillcolor="#60a6f6"] ;\n6 '
                '[label="area error", fillcolor="#60a6f6"] ;\n4 -> 6 ;\n4 -> '
                '5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
Example #2
0
def test_custom_regressor_metrics():
    """Check whether greater_is_better works for SymbolicRegressor."""

    x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0]**2 + x_data[:, 1]**2

    est_gp = SymbolicRegressor(metric='mean absolute error',
                               stopping_criteria=0.000001,
                               random_state=415,
                               parsimony_coefficient=0.001,
                               init_method='full',
                               init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    assert_equal('add(mul(X0, X0), mul(X1, X1))', formula, True)

    def neg_mean_absolute_error(y, y_pred, sample_weight):
        return -1 * mean_absolute_error(y, y_pred, sample_weight)

    customized_fitness = make_fitness(neg_mean_absolute_error,
                                      greater_is_better=True)

    c_est_gp = SymbolicRegressor(metric=customized_fitness,
                                 stopping_criteria=-0.000001,
                                 random_state=415,
                                 parsimony_coefficient=0.001,
                                 verbose=0,
                                 init_method='full',
                                 init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal('add(mul(X0, X0), mul(X1, X1))', c_formula, True)
Example #3
0
def test_classifier_comparison():
    """Test the classifier comparison example works"""

    X, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    datasets = [
        make_moons(noise=0.3, random_state=0),
        make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
    ]
    scores = []
    for ds in datasets:
        X, y = ds
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)
        clf = SymbolicClassifier(random_state=0)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(('%.2f' % score).lstrip('0'))

    assert_equal(scores, ['.95', '.93', '.95'])
Example #4
0
def test_custom_transformer_metrics():
    """Check whether greater_is_better works for SymbolicTransformer."""

    est_gp = SymbolicTransformer(generations=2,
                                 population_size=100,
                                 hall_of_fame=10,
                                 n_components=1,
                                 metric='pearson',
                                 random_state=415)
    est_gp.fit(boston.data, boston.target)
    for program in est_gp:
        formula = program.__str__()
    expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), '
                        'sub(div(X11, X12), add(X12, X0)))')
    assert_equal(expected_formula, formula, True)

    def _neg_weighted_pearson(y, y_pred, w):
        """Calculate the weighted Pearson correlation coefficient."""
        with np.errstate(divide='ignore', invalid='ignore'):
            y_pred_demean = y_pred - np.average(y_pred, weights=w)
            y_demean = y - np.average(y, weights=w)
            corr = (
                (np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt(
                    (np.sum(w * y_pred_demean**2) * np.sum(w * y_demean**2)) /
                    (np.sum(w)**2)))
        if np.isfinite(corr):
            return -1 * np.abs(corr)
        return 0.

    neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson,
                                        greater_is_better=False)

    c_est_gp = SymbolicTransformer(generations=2,
                                   population_size=100,
                                   hall_of_fame=10,
                                   n_components=1,
                                   stopping_criteria=-1,
                                   metric=neg_weighted_pearson,
                                   random_state=415)
    c_est_gp.fit(boston.data, boston.target)
    for program in c_est_gp:
        c_formula = program.__str__()
    assert_equal(expected_formula, c_formula, True)
Example #5
0
def test_function_in_program():
    """Check that using a custom function in a program works"""
    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)
    function_set = ['add', 'sub', 'mul', 'div', logical]
    est = SymbolicTransformer(generations=2,
                              population_size=2000,
                              hall_of_fame=100,
                              n_components=10,
                              function_set=function_set,
                              parsimony_coefficient=0.0005,
                              max_samples=0.9,
                              random_state=0)
    est.fit(boston.data[:300, :], boston.target[:300])

    formula = est._programs[0][906].__str__()
    expected_formula = 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)'
    assert_equal(expected_formula, formula, True)
Example #6
0
def test_custom_functions():
    """Test the custom programs example works"""

    rng = check_random_state(0)
    boston = load_boston()
    perm = rng.permutation(boston.target.size)
    boston.data = boston.data[perm]
    boston.target = boston.target[perm]

    def logic(x1, x2, x3, x4):
        return np.where(x1 > x2, x3, x4)

    logical = make_function(function=logic, name='logical', arity=4)

    function_set = ['add', 'sub', 'mul', 'div', logical]
    gp = SymbolicTransformer(generations=2,
                             population_size=2000,
                             hall_of_fame=100,
                             n_components=10,
                             function_set=function_set,
                             parsimony_coefficient=0.0005,
                             max_samples=0.9,
                             random_state=0)

    gp.fit(boston.data[:300, :], boston.target[:300])

    assert_equal(gp._programs[0][906].__str__(),
                 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)')

    dot_data = gp._programs[0][906].export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="logical", '
                'fillcolor="#136ed4"] ;\n2 [label="X6", fillcolor="#60a6f6"] '
                ';\n3 [label="add", fillcolor="#136ed4"] ;\n4 [label="X11", '
                'fillcolor="#60a6f6"] ;\n5 [label="0.898", '
                'fillcolor="#60a6f6"] ;\n3 -> 5 ;\n3 -> 4 ;\n6 [label="X10", '
                'fillcolor="#60a6f6"] ;\n7 [label="X2", fillcolor="#60a6f6"] '
                ';\n1 -> 7 ;\n1 -> 6 ;\n1 -> 3 ;\n1 -> 2 ;\n8 [label="X5", '
                'fillcolor="#60a6f6"] ;\n0 -> 8 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
Example #7
0
def test_custom_classifier_metrics():
    """Check whether greater_is_better works for SymbolicClassifier."""

    x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2)
    y_true = x_data[:, 0]**2 + x_data[:, 1]**2
    y_true = (y_true < y_true.mean()).astype(int)

    est_gp = SymbolicClassifier(metric='log loss',
                                stopping_criteria=0.000001,
                                random_state=415,
                                parsimony_coefficient=0.01,
                                init_method='full',
                                init_depth=(2, 4))
    est_gp.fit(x_data, y_true)
    formula = est_gp.__str__()
    expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))'
    assert_equal(expected_formula, formula, True)

    def negative_log_loss(y, y_pred, w):
        """Calculate the log loss."""
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1 - eps)
        score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)
        return np.average(score, weights=w)

    customized_fitness = make_fitness(negative_log_loss,
                                      greater_is_better=True)

    c_est_gp = SymbolicClassifier(metric=customized_fitness,
                                  stopping_criteria=0.000001,
                                  random_state=415,
                                  parsimony_coefficient=0.01,
                                  init_method='full',
                                  init_depth=(2, 4))
    c_est_gp.fit(x_data, y_true)
    c_formula = c_est_gp.__str__()
    assert_equal(expected_formula, c_formula, True)
Example #8
0
def test_symbolic_regressor():
    """Check that SymbolicRegressor example works"""

    rng = check_random_state(0)
    X_train = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1
    X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
    y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

    est_gp = SymbolicRegressor(population_size=5000,
                               generations=20,
                               stopping_criteria=0.01,
                               p_crossover=0.7,
                               p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05,
                               p_point_mutation=0.1,
                               max_samples=0.9,
                               parsimony_coefficient=0.01,
                               random_state=0)
    est_gp.fit(X_train, y_train)

    assert_equal(len(est_gp._programs), 7)
    expected = 'sub(add(-0.999, X1), mul(sub(X1, X0), add(X0, X1)))'
    assert_equal(est_gp.__str__(), expected)
    assert_almost_equal(est_gp.score(X_test, y_test), 0.99999, decimal=5)
    dot_data = est_gp._program.export_graphviz()
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="add", fillcolor="#136ed4"] '
                ';\n2 [label="-0.999", fillcolor="#60a6f6"] ;\n3 [label="X1", '
                'fillcolor="#60a6f6"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", '
                'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] '
                ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", '
                'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", '
                'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] '
                ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 '
                ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
    assert_equal(
        est_gp._program.parents, {
            'method': 'Crossover',
            'parent_idx': 1555,
            'parent_nodes': range(1, 4),
            'donor_idx': 78,
            'donor_nodes': []
        })
    idx = est_gp._program.parents['donor_idx']
    fade_nodes = est_gp._program.parents['donor_nodes']
    assert_equal(est_gp._programs[-2][idx].__str__(), 'add(-0.999, X1)')
    assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.351803319075)
    dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes)
    expected = ('digraph program {\nnode [style=filled]\n0 [label="add", '
                'fillcolor="#136ed4"] ;\n1 [label="-0.999", '
                'fillcolor="#60a6f6"] ;\n2 [label="X1", fillcolor="#60a6f6"] '
                ';\n0 -> 2 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)
    idx = est_gp._program.parents['parent_idx']
    fade_nodes = est_gp._program.parents['parent_nodes']
    assert_equal(est_gp._programs[-2][idx].__str__(),
                 'sub(sub(X1, 0.939), mul(sub(X1, X0), add(X0, X1)))')
    assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.17080204042)
    dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes)
    expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", '
                'fillcolor="#136ed4"] ;\n1 [label="sub", fillcolor="#cecece"] '
                ';\n2 [label="X1", fillcolor="#cecece"] ;\n3 [label="0.939", '
                'fillcolor="#cecece"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", '
                'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] '
                ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", '
                'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", '
                'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] '
                ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 '
                ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}')
    assert_equal(dot_data, expected)