def test_symbolic_classifier(): """Check that SymbolicClassifier example works""" rng = check_random_state(0) cancer = load_breast_cancer() perm = rng.permutation(cancer.target.size) cancer.data = cancer.data[perm] cancer.target = cancer.target[perm] est = SymbolicClassifier(parsimony_coefficient=.01, feature_names=cancer.feature_names, random_state=1) est.fit(cancer.data[:400], cancer.target[:400]) y_true = cancer.target[400:] y_score = est.predict_proba(cancer.data[400:])[:, 1] assert_almost_equal(roc_auc_score(y_true, y_score), 0.96937869822485212) dot_data = est._program.export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="div", fillcolor="#136ed4"] ' ';\n2 [label="worst fractal dimension", fillcolor="#60a6f6"] ' ';\n3 [label="mean concave points", fillcolor="#60a6f6"] ' ';\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", fillcolor="#136ed4"] ' ';\n5 [label="mean concave points", fillcolor="#60a6f6"] ;\n6 ' '[label="area error", fillcolor="#60a6f6"] ;\n4 -> 6 ;\n4 -> ' '5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)
def test_custom_regressor_metrics(): """Check whether greater_is_better works for SymbolicRegressor.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0]**2 + x_data[:, 1]**2 est_gp = SymbolicRegressor(metric='mean absolute error', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.001, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() assert_equal('add(mul(X0, X0), mul(X1, X1))', formula, True) def neg_mean_absolute_error(y, y_pred, sample_weight): return -1 * mean_absolute_error(y, y_pred, sample_weight) customized_fitness = make_fitness(neg_mean_absolute_error, greater_is_better=True) c_est_gp = SymbolicRegressor(metric=customized_fitness, stopping_criteria=-0.000001, random_state=415, parsimony_coefficient=0.001, verbose=0, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal('add(mul(X0, X0), mul(X1, X1))', c_formula, True)
def test_classifier_comparison(): """Test the classifier comparison example works""" X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] scores = [] for ds in datasets: X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) clf = SymbolicClassifier(random_state=0) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) scores.append(('%.2f' % score).lstrip('0')) assert_equal(scores, ['.95', '.93', '.95'])
def test_custom_transformer_metrics(): """Check whether greater_is_better works for SymbolicTransformer.""" est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, metric='pearson', random_state=415) est_gp.fit(boston.data, boston.target) for program in est_gp: formula = program.__str__() expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), ' 'sub(div(X11, X12), add(X12, X0)))') assert_equal(expected_formula, formula, True) def _neg_weighted_pearson(y, y_pred, w): """Calculate the weighted Pearson correlation coefficient.""" with np.errstate(divide='ignore', invalid='ignore'): y_pred_demean = y_pred - np.average(y_pred, weights=w) y_demean = y - np.average(y, weights=w) corr = ( (np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt( (np.sum(w * y_pred_demean**2) * np.sum(w * y_demean**2)) / (np.sum(w)**2))) if np.isfinite(corr): return -1 * np.abs(corr) return 0. neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson, greater_is_better=False) c_est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, stopping_criteria=-1, metric=neg_weighted_pearson, random_state=415) c_est_gp.fit(boston.data, boston.target) for program in c_est_gp: c_formula = program.__str__() assert_equal(expected_formula, c_formula, True)
def test_function_in_program(): """Check that using a custom function in a program works""" def logic(x1, x2, x3, x4): return np.where(x1 > x2, x3, x4) logical = make_function(function=logic, name='logical', arity=4) function_set = ['add', 'sub', 'mul', 'div', logical] est = SymbolicTransformer(generations=2, population_size=2000, hall_of_fame=100, n_components=10, function_set=function_set, parsimony_coefficient=0.0005, max_samples=0.9, random_state=0) est.fit(boston.data[:300, :], boston.target[:300]) formula = est._programs[0][906].__str__() expected_formula = 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)' assert_equal(expected_formula, formula, True)
def test_custom_functions(): """Test the custom programs example works""" rng = check_random_state(0) boston = load_boston() perm = rng.permutation(boston.target.size) boston.data = boston.data[perm] boston.target = boston.target[perm] def logic(x1, x2, x3, x4): return np.where(x1 > x2, x3, x4) logical = make_function(function=logic, name='logical', arity=4) function_set = ['add', 'sub', 'mul', 'div', logical] gp = SymbolicTransformer(generations=2, population_size=2000, hall_of_fame=100, n_components=10, function_set=function_set, parsimony_coefficient=0.0005, max_samples=0.9, random_state=0) gp.fit(boston.data[:300, :], boston.target[:300]) assert_equal(gp._programs[0][906].__str__(), 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)') dot_data = gp._programs[0][906].export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="logical", ' 'fillcolor="#136ed4"] ;\n2 [label="X6", fillcolor="#60a6f6"] ' ';\n3 [label="add", fillcolor="#136ed4"] ;\n4 [label="X11", ' 'fillcolor="#60a6f6"] ;\n5 [label="0.898", ' 'fillcolor="#60a6f6"] ;\n3 -> 5 ;\n3 -> 4 ;\n6 [label="X10", ' 'fillcolor="#60a6f6"] ;\n7 [label="X2", fillcolor="#60a6f6"] ' ';\n1 -> 7 ;\n1 -> 6 ;\n1 -> 3 ;\n1 -> 2 ;\n8 [label="X5", ' 'fillcolor="#60a6f6"] ;\n0 -> 8 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)
def test_custom_classifier_metrics(): """Check whether greater_is_better works for SymbolicClassifier.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0]**2 + x_data[:, 1]**2 y_true = (y_true < y_true.mean()).astype(int) est_gp = SymbolicClassifier(metric='log loss', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))' assert_equal(expected_formula, formula, True) def negative_log_loss(y, y_pred, w): """Calculate the log loss.""" eps = 1e-15 y_pred = np.clip(y_pred, eps, 1 - eps) score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred) return np.average(score, weights=w) customized_fitness = make_fitness(negative_log_loss, greater_is_better=True) c_est_gp = SymbolicClassifier(metric=customized_fitness, stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal(expected_formula, c_formula, True)
def test_symbolic_regressor(): """Check that SymbolicRegressor example works""" rng = check_random_state(0) X_train = rng.uniform(-1, 1, 100).reshape(50, 2) y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1 X_test = rng.uniform(-1, 1, 100).reshape(50, 2) y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1 est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X_train, y_train) assert_equal(len(est_gp._programs), 7) expected = 'sub(add(-0.999, X1), mul(sub(X1, X0), add(X0, X1)))' assert_equal(est_gp.__str__(), expected) assert_almost_equal(est_gp.score(X_test, y_test), 0.99999, decimal=5) dot_data = est_gp._program.export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="add", fillcolor="#136ed4"] ' ';\n2 [label="-0.999", fillcolor="#60a6f6"] ;\n3 [label="X1", ' 'fillcolor="#60a6f6"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", ' 'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] ' ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", ' 'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", ' 'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] ' ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 ' ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected) assert_equal( est_gp._program.parents, { 'method': 'Crossover', 'parent_idx': 1555, 'parent_nodes': range(1, 4), 'donor_idx': 78, 'donor_nodes': [] }) idx = est_gp._program.parents['donor_idx'] fade_nodes = est_gp._program.parents['donor_nodes'] assert_equal(est_gp._programs[-2][idx].__str__(), 'add(-0.999, X1)') assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.351803319075) dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes) expected = ('digraph program {\nnode [style=filled]\n0 [label="add", ' 'fillcolor="#136ed4"] ;\n1 [label="-0.999", ' 'fillcolor="#60a6f6"] ;\n2 [label="X1", fillcolor="#60a6f6"] ' ';\n0 -> 2 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected) idx = est_gp._program.parents['parent_idx'] fade_nodes = est_gp._program.parents['parent_nodes'] assert_equal(est_gp._programs[-2][idx].__str__(), 'sub(sub(X1, 0.939), mul(sub(X1, X0), add(X0, X1)))') assert_almost_equal(est_gp._programs[-2][idx].fitness_, 0.17080204042) dot_data = est_gp._programs[-2][idx].export_graphviz(fade_nodes=fade_nodes) expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="sub", fillcolor="#cecece"] ' ';\n2 [label="X1", fillcolor="#cecece"] ;\n3 [label="0.939", ' 'fillcolor="#cecece"] ;\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", ' 'fillcolor="#136ed4"] ;\n5 [label="sub", fillcolor="#136ed4"] ' ';\n6 [label="X1", fillcolor="#60a6f6"] ;\n7 [label="X0", ' 'fillcolor="#60a6f6"] ;\n5 -> 7 ;\n5 -> 6 ;\n8 [label="add", ' 'fillcolor="#136ed4"] ;\n9 [label="X0", fillcolor="#60a6f6"] ' ';\n10 [label="X1", fillcolor="#60a6f6"] ;\n8 -> 10 ;\n8 -> 9 ' ';\n4 -> 8 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)