def test_print_overloading(): """Check that printing a program object results in 'pretty' output""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(gp) output = out.getvalue().strip() finally: sys.stdout = orig_stdout lisp = "mul(div(X8, X1), sub(X9, 0.500))" assert_true(output == lisp)
def test_deprecated(): # Test whether the deprecated decorator issues appropriate warnings # Copied almost verbatim from http://docs.python.org/library/warnings.html # First a function... with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @deprecated() def ham(): return "spam" spam = ham() assert_equal(spam, "spam") # function must remain usable assert_equal(len(w), 1) assert_true(issubclass(w[0].category, DeprecationWarning)) assert_true("deprecated" in str(w[0].message).lower()) # ... then a class. with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @deprecated("don't use this") class Ham(object): SPAM = 1 ham = Ham() assert_true(hasattr(ham, "SPAM")) assert_equal(len(w), 1) assert_true(issubclass(w[0].category, DeprecationWarning)) assert_true("deprecated" in str(w[0].message).lower())
def test_print_overloading(): """Check that printing a program object results in 'pretty' output""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(gp) output = out.getvalue().strip() finally: sys.stdout = orig_stdout lisp = "mul(div(X8, X1), sub(X9, 0.500))" assert_true(output == lisp)
def test_program_init_depth(): """'full' should create constant depth programs for single depth limit""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2', 'sqrt1', 'log1', 'abs1', 'max2', 'min2'], 'arities': {1: ['sqrt1', 'log1', 'abs1'], 2: ['add2', 'sub2', 'mul2', 'div2', 'max2', 'min2']}, 'init_depth': (6, 6), 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) programs = [] for i in range(20): programs.append(_Program(init_method='full', random_state=random_state, **params)) full_depth = np.bincount([gp.depth_ for gp in programs]) programs = [] for i in range(20): programs.append(_Program(init_method='half and half', random_state=random_state, **params)) hnh_depth = np.bincount([gp.depth_ for gp in programs]) programs = [] for i in range(20): programs.append(_Program(init_method='grow', random_state=random_state, **params)) grow_depth = np.bincount([gp.depth_ for gp in programs]) assert_true(full_depth[-1] == 20) assert_false(hnh_depth[-1] == 20) assert_false(grow_depth[-1] == 20)
def test_weighted_correlations(): """Check weighted Pearson correlation coefficient matches scipy""" random_state = check_random_state(415) x1 = random_state.uniform(size=500) x2 = random_state.uniform(size=500) w1 = np.ones(500) w2 = random_state.uniform(size=500) # Pearson's correlation coefficient scipy_pearson = pearsonr(x1, x2)[0] # Check with constant weights (should be equal) gplearn_pearson = weighted_pearson(x1, x2, w1) assert_almost_equal(scipy_pearson, gplearn_pearson) # Check with irregular weights (should be different) gplearn_pearson = weighted_pearson(x1, x2, w2) assert_true(abs(scipy_pearson - gplearn_pearson) > 0.01) # Spearman's correlation coefficient scipy_spearman = spearmanr(x1, x2)[0] # Check with constant weights (should be equal) gplearn_spearman = weighted_spearman(x1, x2, w1) assert_almost_equal(scipy_spearman, gplearn_spearman) # Check with irregular weights (should be different) gplearn_spearman = weighted_pearson(x1, x2, w2) assert_true(abs(scipy_spearman - gplearn_spearman) > 0.01)
def test_compute_class_weight(): # Test (and demo) compute_class_weight. y = np.asarray([2, 2, 2, 3, 3, 4]) classes = np.unique(y) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_true(cw[0] < cw[1] < cw[2])
def test_early_stopping(): """Check that early stopping works""" est1 = SymbolicRegressor(stopping_criteria=10, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1)
def test_output_shape(): """Check output shape is as expected""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the transformer est = SymbolicTransformer(n_components=5, generations=2, random_state=0) est.fit(X, y) assert_true(est.transform(X).shape == (5, 5))
def test_transformer_iterable(): """Check that the transformer is iterable""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) est = SymbolicTransformer(generations=2, random_state=0) # Check unfitted unfitted_len = len(est) unfitted_iter = [gp.length_ for gp in est] expected_iter = [] assert_true(unfitted_len == 0) assert_true(unfitted_iter == expected_iter) # Check fitted est.fit(X, y) fitted_len = len(est) fitted_iter = [gp.length_ for gp in est] expected_iter = [15, 19, 19, 12, 9, 10, 7, 14, 6, 21] assert_true(fitted_len == 10) assert_true(fitted_iter == expected_iter) # Check IndexError assert_raises(IndexError, est.__getitem__, 10)
def test_subsample(): """Check that subsample work and that results differ""" est1 = SymbolicRegressor(max_samples=1.0, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(max_samples=0.7, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_transformer_iterable(): """Check that the transformer is iterable""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) function_set = [ 'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min' ] est = SymbolicTransformer(population_size=500, generations=2, function_set=function_set, random_state=0) # Check unfitted unfitted_len = len(est) unfitted_iter = [gp.length_ for gp in est] expected_iter = [] assert_true(unfitted_len == 0) assert_true(unfitted_iter == expected_iter) # Check fitted est.fit(X, y) fitted_len = len(est) fitted_iter = [gp.length_ for gp in est] expected_iter = [15, 19, 19, 12, 9, 10, 7, 14, 6, 21] assert_true(fitted_len == 10) assert_true(fitted_iter == expected_iter) # Check IndexError assert_raises(IndexError, est.__getitem__, 10)
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(trigonometric=True, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_make_rng(): # Check the check_random_state utility function behavior assert_true(check_random_state(None) is np.random.mtrand._rand) assert_true(check_random_state(np.random) is np.random.mtrand._rand) rng_42 = np.random.RandomState(42) assert_true(check_random_state(42).randint(100) == rng_42.randint(100)) rng_42 = np.random.RandomState(42) assert_true(check_random_state(rng_42) is rng_42) rng_42 = np.random.RandomState(42) assert_true(check_random_state(43).randint(100) != rng_42.randint(100)) assert_raises(ValueError, check_random_state, "some invalid seed")
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor( function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'], random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def check_fit_score_takes_y(name, Estimator): # check that all estimators accept an optional y # in fit and score so they can be used in pipelines rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 y = multioutput_estimator_convert_y_2d(name, y) estimator = Estimator() set_fast_parameters(estimator) set_random_state(estimator) funcs = ["fit", "score", "partial_fit", "fit_predict", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func(X, y) args = inspect.getargspec(func).args assert_true(args[2] in ["y", "Y"])
def check_parameters_default_constructible(name, Estimator): # test default-constructibility # get rid of deprecation warnings with warnings.catch_warnings(record=True): estimator = Estimator() # test cloning clone(estimator) # test __repr__ repr(estimator) # test that set_params returns self assert_true(isinstance(estimator.set_params(), Estimator)) # test if init does nothing but set parameters # this is important for grid_search etc. # We get the default parameters from init and then # compare these against the actual values of the attributes. # this comes from getattr. Gets rid of deprecation decorator. init = getattr(estimator.__init__, 'deprecated_original', estimator.__init__) try: args, varargs, kws, defaults = inspect.getargspec(init) except TypeError: # init is not a python function. # true for mixins return params = estimator.get_params() args = args[1:] if args: # non-empty list assert_equal(len(args), len(defaults)) else: return for arg, default in zip(args, defaults): if arg not in params.keys(): # deprecated parameter, not in get_params assert_true(default is None) continue if isinstance(params[arg], np.ndarray): assert_array_equal(params[arg], default) else: assert_equal(params[arg], default)
def check_sparsify_coefficients(name, Estimator): X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, -2], [2, 2], [-2, -2]]) y = [1, 1, 1, 2, 2, 2, 3, 3, 3] est = Estimator() est.fit(X, y) pred_orig = est.predict(X) # test sparsify with dense inputs est.sparsify() assert_true(sparse.issparse(est.coef_)) pred = est.predict(X) assert_array_equal(pred, pred_orig) # pickle and unpickle with sparse coef_ est = pickle.loads(pickle.dumps(est)) assert_true(sparse.issparse(est.coef_)) pred = est.predict(X) assert_array_equal(pred, pred_orig)
def test_program_init_depth(): """'full' should create constant depth programs for single depth limit""" params = { 'function_set': [add2, sub2, mul2, div2, sqrt1, log1, abs1, max2, min2], 'arities': { 1: [sqrt1, log1, abs1], 2: [add2, sub2, mul2, div2, max2, min2] }, 'init_depth': (6, 6), 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) programs = [] for i in range(20): programs.append( _Program(init_method='full', random_state=random_state, **params)) full_depth = np.bincount([gp.depth_ for gp in programs]) programs = [] for i in range(20): programs.append( _Program(init_method='half and half', random_state=random_state, **params)) hnh_depth = np.bincount([gp.depth_ for gp in programs]) programs = [] for i in range(20): programs.append( _Program(init_method='grow', random_state=random_state, **params)) grow_depth = np.bincount([gp.depth_ for gp in programs]) assert_true(full_depth[-1] == 20) assert_false(hnh_depth[-1] == 20) assert_false(grow_depth[-1] == 20)
def fit(self, X, y): assert_true(len(X) == len(y)) if self.check_X is not None: assert_true(self.check_X(X)) if self.check_y is not None: assert_true(self.check_y(y)) return self
def test_export_graphviz(): """Check output of a simple program to Graphviz""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) output = gp.export_graphviz() tree = 'digraph program {\n' \ 'node [style=filled]0 [label="mul", fillcolor="#136ed4"] ;\n' \ '1 [label="div", fillcolor="#136ed4"] ;\n' \ '2 [label="X8", fillcolor="#60a6f6"] ;\n' \ '3 [label="X1", fillcolor="#60a6f6"] ;\n' \ '1 -> 3 ;\n1 -> 2 ;\n' \ '4 [label="sub", fillcolor="#136ed4"] ;\n' \ '5 [label="X9", fillcolor="#60a6f6"] ;\n' \ '6 [label="0.500", fillcolor="#60a6f6"] ;\n' \ '4 -> 6 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}' assert_true(output == tree) # Test with fade_nodes output = gp.export_graphviz(fade_nodes=[0, 1, 2, 3]) tree = 'digraph program {\n' \ 'node [style=filled]0 [label="mul", fillcolor="#cecece"] ;\n' \ '1 [label="div", fillcolor="#cecece"] ;\n' \ '2 [label="X8", fillcolor="#cecece"] ;\n' \ '3 [label="X1", fillcolor="#cecece"] ;\n' \ '1 -> 3 ;\n1 -> 2 ;\n' \ '4 [label="sub", fillcolor="#136ed4"] ;\n' \ '5 [label="X9", fillcolor="#60a6f6"] ;\n' \ '6 [label="0.500", fillcolor="#60a6f6"] ;\n' \ '4 -> 6 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}' assert_true(output == tree) # Test a degenerative single-node program test_gp = [1] gp = _Program(random_state=random_state, program=test_gp, **params) output = gp.export_graphviz() tree = 'digraph program {\n' \ 'node [style=filled]0 [label="X1", fillcolor="#60a6f6"] ;\n}' assert_true(output == tree)
def test_export_graphviz(): """Check output of a simple program to Graphviz""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) output = gp.export_graphviz() tree = 'digraph program {\n' \ 'node [style=filled]0 [label="mul", fillcolor="#136ed4"] ;\n' \ '1 [label="div", fillcolor="#136ed4"] ;\n' \ '2 [label="X8", fillcolor="#60a6f6"] ;\n' \ '3 [label="X1", fillcolor="#60a6f6"] ;\n' \ '1 -> 3 ;\n1 -> 2 ;\n' \ '4 [label="sub", fillcolor="#136ed4"] ;\n' \ '5 [label="X9", fillcolor="#60a6f6"] ;\n' \ '6 [label="0.500", fillcolor="#60a6f6"] ;\n' \ '4 -> 6 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}' assert_true(output == tree) # Test with fade_nodes output = gp.export_graphviz(fade_nodes=[0, 1, 2, 3]) tree = 'digraph program {\n' \ 'node [style=filled]0 [label="mul", fillcolor="#cecece"] ;\n' \ '1 [label="div", fillcolor="#cecece"] ;\n' \ '2 [label="X8", fillcolor="#cecece"] ;\n' \ '3 [label="X1", fillcolor="#cecece"] ;\n' \ '1 -> 3 ;\n1 -> 2 ;\n' \ '4 [label="sub", fillcolor="#136ed4"] ;\n' \ '5 [label="X9", fillcolor="#60a6f6"] ;\n' \ '6 [label="0.500", fillcolor="#60a6f6"] ;\n' \ '4 -> 6 ;\n4 -> 5 ;\n0 -> 4 ;\n0 -> 1 ;\n}' assert_true(output == tree) # Test a degenerative single-node program test_gp = [1] gp = _Program(random_state=random_state, program=test_gp, **params) output = gp.export_graphviz() tree = 'digraph program {\n' \ 'node [style=filled]0 [label="X1", fillcolor="#60a6f6"] ;\n}' assert_true(output == tree)
def test_parsimony_coefficient(): """Check that parsimony coefficients work and that results differ""" est1 = SymbolicRegressor(parsimony_coefficient=0.001, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(parsimony_coefficient=0.1, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) est3 = SymbolicRegressor(parsimony_coefficient='auto', random_state=0) est3.fit(boston.data[:400, :], boston.target[:400]) est3 = mean_absolute_error(est3.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01) assert_true(abs(est1 - est3) > 0.01) assert_true(abs(est2 - est3) > 0.01)
def check_classifiers_train(name, Classifier): X_m, y_m = make_blobs(random_state=0) X_m, y_m = shuffle(X_m, y_m, random_state=7) X_m = StandardScaler().fit_transform(X_m) # generate binary problem from multi-class one y_b = y_m[y_m != 2] X_b = X_m[y_m != 2] for (X, y) in [(X_m, y_m), (X_b, y_b)]: # catch deprecation warnings classes = np.unique(y) n_classes = len(classes) n_samples, n_features = X.shape with warnings.catch_warnings(record=True): classifier = Classifier() if name in ['BernoulliNB', 'MultinomialNB']: X -= X.min() set_fast_parameters(classifier) set_random_state(classifier) # raises error on malformed input for fit assert_raises(ValueError, classifier.fit, X, y[:-1]) # fit classifier.fit(X, y) # with lists classifier.fit(X.tolist(), y.tolist()) assert_true(hasattr(classifier, "classes_")) y_pred = classifier.predict(X) assert_equal(y_pred.shape, (n_samples,)) # training set performance if name not in ['BernoulliNB', 'MultinomialNB']: assert_greater(accuracy_score(y, y_pred), 0.83) # raises error on malformed input for predict assert_raises(ValueError, classifier.predict, X.T) if hasattr(classifier, "decision_function"): try: # decision_function agrees with predict decision = classifier.decision_function(X) if n_classes is 2: assert_equal(decision.shape, (n_samples,)) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) if n_classes is 3: assert_equal(decision.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(decision, axis=1), y_pred) # raises error on malformed input assert_raises(ValueError, classifier.decision_function, X.T) # raises error on malformed input for decision_function assert_raises(ValueError, classifier.decision_function, X.T) except NotImplementedError: pass if hasattr(classifier, "predict_proba"): # predict_proba agrees with predict y_prob = classifier.predict_proba(X) assert_equal(y_prob.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples)) # raises error on malformed input assert_raises(ValueError, classifier.predict_proba, X.T) # raises error on malformed input for predict_proba assert_raises(ValueError, classifier.predict_proba, X.T)
def predict(self, T): if self.check_X is not None: assert_true(self.check_X(T)) return T.shape[0]
def test_print_overloading_estimator(): """Check that printing a fitted estimator results in 'pretty' output""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out output = str([gp.__str__() for gp in est]) print(output.replace("',", ",\n").replace("'", "")) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program)
def test_resample_noarg(): # Border case not worth mentioning in doctests assert_true(resample() is None)