def test_symbolic_classifier_comparison(): """Test the classifier comparison example works""" X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] scores = [] for ds in datasets: X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) clf = SymbolicClassifier(random_state=0) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) scores.append(('%.2f' % score).lstrip('0')) assert_equal(scores, ['.95', '.93', '.95'])
def test_symbolic_classifier(): """Check that SymbolicClassifier example works""" rng = check_random_state(0) cancer = load_breast_cancer() perm = rng.permutation(cancer.target.size) cancer.data = cancer.data[perm] cancer.target = cancer.target[perm] est = SymbolicClassifier(parsimony_coefficient=.01, feature_names=cancer.feature_names, random_state=1) est.fit(cancer.data[:400], cancer.target[:400]) y_true = cancer.target[400:] y_score = est.predict_proba(cancer.data[400:])[:, 1] assert_almost_equal(roc_auc_score(y_true, y_score), 0.96937869822485212) dot_data = est._program.export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="div", fillcolor="#136ed4"] ' ';\n2 [label="worst fractal dimension", fillcolor="#60a6f6"] ' ';\n3 [label="mean concave points", fillcolor="#60a6f6"] ' ';\n1 -> 3 ;\n1 -> 2 ;\n4 [label="mul", fillcolor="#136ed4"] ' ';\n5 [label="mean concave points", fillcolor="#60a6f6"] ;\n6 ' '[label="area error", fillcolor="#60a6f6"] ;\n4 -> 6 ;\n4 -> ' '5 ;\n0 -> 4 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2) # Check the classifier est = SymbolicClassifier(generations=2, random_state=0) est.fit(cancer.data[:100, :], cancer.target[:100]) score = est.score(cancer.data[500:, :], cancer.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(cancer.data[500:, :], cancer.target[500:]) assert_equal(score, score2)
def test_pipeline(): """Check that SymbolicRegressor/Transformer can work in a pipeline""" # Check the regressor est = make_pipeline(StandardScaler(), SymbolicRegressor(population_size=50, generations=5, tournament_size=5, random_state=0)) est.fit(boston.data, boston.target) assert_almost_equal(est.score(boston.data, boston.target), -4.00270923) # Check the classifier est = make_pipeline(StandardScaler(), SymbolicClassifier(population_size=50, generations=5, tournament_size=5, random_state=0)) est.fit(cancer.data, cancer.target) assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961) # Check the transformer est = make_pipeline(SymbolicTransformer(population_size=50, hall_of_fame=20, generations=5, tournament_size=5, random_state=0), DecisionTreeRegressor()) est.fit(boston.data, boston.target) assert_almost_equal(est.score(boston.data, boston.target), 1.0)
def test_validate_functions(): """Check that valid functions are accepted & invalid ones raise error""" for Symbolic in (SymbolicRegressor, SymbolicTransformer): # These should be fine est = Symbolic(generations=2, random_state=0, function_set=(add2, sub2, mul2, div2)) est.fit(boston.data, boston.target) est = Symbolic(generations=2, random_state=0, function_set=('add', 'sub', 'mul', div2)) est.fit(boston.data, boston.target) # These should fail est = Symbolic(generations=2, random_state=0, function_set=('ni', 'sub', 'mul', div2)) assert_raises(ValueError, est.fit, boston.data, boston.target) est = Symbolic(generations=2, random_state=0, function_set=(7, 'sub', 'mul', div2)) assert_raises(ValueError, est.fit, boston.data, boston.target) est = Symbolic(generations=2, random_state=0, function_set=()) assert_raises(ValueError, est.fit, boston.data, boston.target) # Now for the classifier... These should be fine est = SymbolicClassifier(generations=2, random_state=0, function_set=(add2, sub2, mul2, div2)) est.fit(cancer.data, cancer.target) est = SymbolicClassifier(generations=2, random_state=0, function_set=('add', 'sub', 'mul', div2)) est.fit(cancer.data, cancer.target) # These should fail est = SymbolicClassifier(generations=2, random_state=0, function_set=('ni', 'sub', 'mul', div2)) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(generations=2, random_state=0, function_set=(7, 'sub', 'mul', div2)) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(generations=2, random_state=0, function_set=()) assert_raises(ValueError, est.fit, cancer.data, cancer.target)
def test_sample_weight(): """Check sample_weight param works""" # Check constant sample_weight has no effect sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicRegressor(population_size=100, generations=2, random_state=0) est1.fit(boston.data, boston.target) est2 = SymbolicRegressor(population_size=100, generations=2, random_state=0) est2.fit(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicRegressor(population_size=100, generations=2, random_state=0) est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the classifier sample_weight = np.ones(cancer.target.shape[0]) est1 = SymbolicClassifier(population_size=100, generations=2, random_state=0) est1.fit(cancer.data, cancer.target) est2 = SymbolicClassifier(population_size=100, generations=2, random_state=0) est2.fit(cancer.data, cancer.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicClassifier(population_size=100, generations=2, random_state=0) est3.fit(cancer.data, cancer.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the transformer sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicTransformer(population_size=100, generations=2, random_state=0) est1 = est1.fit_transform(boston.data, boston.target) est2 = SymbolicTransformer(population_size=100, generations=2, random_state=0) est2 = est2.fit_transform(boston.data, boston.target, sample_weight=sample_weight) assert_array_almost_equal(est1, est2)
def Symbolic_reg_expr(X, y): est_gp = SymbolicClassifier(parsimony_coefficient=.01, random_state=1) est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=0, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X, y) sym_expr = str(est_gp._program) X0, X1, X2, X3, X4, X5, X6, X7, X8, X9 = symbols( 'X0 X1 X2 X3 X4 X5 X6 X7 X8 X9') converter = { 'sub': lambda x, y: x - y, 'div': lambda x, y: x / y, 'mul': lambda x, y: x * y, 'add': lambda x, y: x + y, 'neg': lambda x: -x, 'pow': lambda x, y: x**y } sym_reg = simplify(sympify(sym_expr, locals=converter)) sym_reg = sym_reg.subs((X0, X1, X2, X3, X4, X5, X6, X7, X8, X9), (X0, X1, X2, X3, X4, X5, X6, X7, X8, X9)) vars_ = [X0, X1, X2, X3, X4, X5, X6, X7, X8, X9] gradients_ = [] for var in vars_: gradients_.append(diff(sym_reg, var)) return sym_reg, gradients_
def main(): seed = 0 np.random.seed(seed) df = Dataset('ml_project1_data.xlsx').rm_df y = df['Response'] X = df.drop(columns='Response') training, testing, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42) training['Response'] = y_train testing['Response'] = y_test pr = Processor(training,testing,seed = 0) fe = FeatureEngineer(pr.training,pr.unseen,seed = 0) training = fe.training testing = fe.unseen est = SymbolicClassifier(generations = 200, random_state = 0) est.fit(training.drop('Response',axis = 1), training['Response']) assess_generalization_auroc(est,testing,True) y_pred = est.predict_proba(testing.drop('Response',axis = 1))[:,1] y_true = testing['Response'] print(profit(y_true, y_pred)) #+++++++++++++++++ 5) modelling #Create Optimizer ''' mlp_param_grid = {'mlpc__hidden_layer_sizes': [(3), (6), (3, 3), (5, 5)], 'mlpc__learning_rate_init': [0.001, 0.01]} mlp_gscv = bayes_optimization_MLP(fe.training,mlp_param_grid, cv = 5,seed = 0) #mlp_gscv.fit(training.loc[:, (training.columns != "Response")].values, training["Response"].values) print("Best parameter set: ", mlp_gscv.best_params_) # pd.DataFrame.from_dict(mlp_gscv.cv_results_).to_excel("D:\\PipeLines\\project_directory\\data\\mlp_gscv.xlsx") #+++++++++++++++++ 6) retraining & assessment of generalization ability #auprc,precision, recall = assess_generalization_auroc(mlp_gscv.best_estimator_, testing) #print("AUPRC: {:.2f}".format(auprc)) ''' plt.show()
def test_parallel_train(): """Check predictions are the same for different n_jobs""" # Check the regressor ests = [ SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.predict(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2) # Check the transformer ests = [ SymbolicTransformer(population_size=100, hall_of_fame=50, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.transform(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2) # Check the classifier ests = [ SymbolicClassifier(population_size=100, generations=4, n_jobs=n_jobs, random_state=0).fit(cancer.data[:100, :], cancer.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.predict(cancer.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2)
def gp(self) -> Pipeline: """ Creates a pipeline for Genetic programming :return Pipeline: returns a Pipeline for the best estimator """ pipeline = Pipeline( steps=[('scaler', StandardScaler()), ('gp', SymbolicClassifier())]) params_grid = {'gp__generations': [10, 50, 100]} return self.do_grid_search("gp", pipeline, params_grid)
def gp_grid_search(training, param_grid, seed, cv=5): pipeline = Pipeline([("gp", SymbolicClassifier(random_state=seed))]) clf_gscv = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring=make_scorer(profit)) clf_gscv.fit(training.loc[:, training.columns != "Response"].values, training["Response"].values) return clf_gscv
def main(train_file_name,valid_file_name,test_file_name): X_train, y_train, X_validation, y_validation, X_test = \ load_process_data(train_file_name, valid_file_name, test_file_name) gp_classifier = SymbolicClassifier(population_size=20, generations=65, tournament_size=3, const_range=None, init_depth=(4, 12), parsimony_coefficient=0.00000000000000000000000000000001, # parsimony_coefficient=0.0, # init_method='full', function_set=('add', 'sub', 'mul', 'div'), # make_function(my_sqr, "sqr", arity=2, wrap=False)), transformer='sigmoid', #metric=f_beta, p_crossover=0.85, p_subtree_mutation=0.04, p_hoist_mutation=0.01, p_point_mutation=0.04, p_point_replace=0.005, max_samples=1.0, feature_names=None, warm_start=False, low_memory=True, n_jobs=8, verbose=1, random_state=None) gp_classifier.fit(X_train, y_train) y_val_proba = gp_classifier.predict_proba(X_validation) y_train_proba = gp_classifier.predict_proba(X_train) best_threshold = get_best_threshold(y_val_proba, y_validation) y_train_pred = np.where(y_train_proba[:, 1] > best_threshold, 1, 0) y_val_pred = np.where(y_val_proba[:, 1] > best_threshold, 1, 0) str_header = "$"*78 print(str_header) print(str_header) print('Train accuracy', accuracy_score(y_train, y_train_pred)) print('Validation accuracy', accuracy_score(y_validation, y_val_pred)) print('Train precision', precision_score(y_train, y_train_pred)) print('Validation precision', precision_score(y_validation, y_val_pred)) print('Train recall', recall_score(y_train, y_train_pred)) print('Validation recall', recall_score(y_validation, y_val_pred)) print('Train f-beta score', fbeta_score(y_train, y_train_pred, beta=0.25)) validation_beta_score = fbeta_score(y_validation, y_val_pred, beta=0.25) print(f'Validation f-beta score {validation_beta_score}') print(str_header) print(str_header)
def test_early_stopping(): """Check that early stopping works""" est1 = SymbolicRegressor(stopping_criteria=10, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicClassifier(stopping_criteria=.9, random_state=0) est1.fit(cancer.data[:400, :], cancer.target[:400]) assert_true(len(est1._programs) == 1)
def test_custom_classifier_metrics(): """Check whether greater_is_better works for SymbolicClassifier.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 y_true = (y_true < y_true.mean()).astype(int) est_gp = SymbolicClassifier(metric='log loss', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))' assert_equal(expected_formula, formula, True) def negative_log_loss(y, y_pred, w): """Calculate the log loss.""" eps = 1e-15 y_pred = np.clip(y_pred, eps, 1 - eps) score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred) return np.average(score, weights=w) customized_fitness = make_fitness(negative_log_loss, greater_is_better=True) c_est_gp = SymbolicClassifier(metric=customized_fitness, stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal(expected_formula, c_formula, True)
def train(): est_gp = SymbolicClassifier(population_size=250, generations=20, tournament_size=20, stopping_criteria=0.01, parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.05, p_hoist_mutation=0.0025, p_point_mutation=0.01, p_point_replace=0.0025, verbose=1, max_samples=0.9, feature_names=feature_names) est_gp.fit(X_train, y_train) print(est_gp._program) print(est_gp.score(X_train, y_train)) print(est_gp.score(X_test, y_test))
def test_input_shape(): """Check changed dimensions cause failure""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) yc = np.asarray(['foo', 'bar', 'foo', 'foo', 'bar']) X2 = np.reshape(random_state.uniform(size=45), (5, 9)) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.predict, X2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.transform, X2) # Check the classifier est = SymbolicClassifier(generations=2, random_state=0) est.fit(X, yc) assert_raises(ValueError, est.predict, X2)
import pandas as pd from gplearn.genetic import SymbolicClassifier from sklearn.metrics import roc_auc_score from sklearn.utils import shuffle if __name__ == '__main__': # creating data structures train_set = pd.read_csv("training.txt", sep=" ") test_set = pd.read_csv("test.txt", sep=" ") x_train = train_set.drop("Target", axis=1) y_train = train_set["Target"] x_test = test_set.drop("Target", axis=1) y_test = test_set["Target"] est = SymbolicClassifier(parsimony_coefficient=.01, stopping_criteria=0.01, feature_names=list(x_train.columns.values), random_state=3) est.fit(x_train, y_train) y_true = y_test y_score = est.predict_proba(x_test)[:, 1] print("Accuracy:", roc_auc_score(y_true, y_score), "Program:", est._program)
# As according to the documentation if the last value is a 2 then the person is known to have a benign tumor if int(data[10]) == 2: # Not cancerous benign.append("benign") else: # Is cancerous benign.append("malignant") classifier = SymbolicClassifier( # Prevents 'bloat' used for large programs when evolution is increasing the size of the program with an # insignificant increase in fitness parsimony_coefficient=.01, # The list of attributes names, used in producing the final equation feature_names=attributes, # Displays each evolutionary state and fitness after each tournament is run # Note: If commented the user will need to be patient before final results are displayed verbose=1, # Stops the program early if the criteria is met. This is to prevent long computation time for minimal gain stopping_criteria=0.15, # When the population is 500 = ~85% 1000 = ~90% 2000 = ~95% population_size=2000, # basic functions are all that is required the inclusion of log functions provides roughly 5% increase in fitness function_set={"mul", "div", "add", "sub", "log"}) # The first 400 values in the file are trained and tested against the first 400 known values to be benign classifier.fit(values[:400], benign[:400]) # Returns the accuracy as a percentage from the fitness function print("Accuracy: " + (classifier.score(values[:400], benign[:400]) * 100).__str__() + "%") # Returns the function that achieves the above fitness to be entered into a tree in a breadth first fashion print("Function: " + str(classifier._program))
y_pred = 1 diffs = np.abs(y - y_pred) # calculate how many different values return 1 - (np.sum(diffs) / len(y_pred)) accuracy = make_fitness(_accuracy, greater_is_better=True) est_gp = SymbolicClassifier( population_size=1000, generations=200, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36'), function_set=('add', 'sub', 'mul', 'div')) est_gp.fit(X_train, y_train) print('The best individual is : ') print(est_gp) print('Training set accuracy is %0.2f%%' % (100 * est_gp.score(X_train, y_train))) Predict_value = est_gp.predict(X_test) count = 0
'param_grid': { 'C': cv_params['clf_svm_c'], 'kernel': cv_params['clf_svm_kern'], 'degree': cv_params['clf_svm_deg'], 'gamma': cv_params['clf_svm_g']}}, 'SGDClassifier': { 'estimator': SGDClassifier(class_weight='balanced', penalty=args.clf_sgd_penalty, random_state=args.random_seed), 'param_grid': { 'alpha': cv_params['clf_sgd_a'], 'loss': cv_params['clf_sgd_loss'], 'l1_ratio': cv_params['clf_sgd_l1r']}}, 'SymbolicClassifier': { 'estimator': SymbolicClassifier(parsimony_coefficient='auto', random_state=args.random_seed, stopping_criteria=0.01), 'param_grid': { 'function_set': cv_params['clf_sym_fs'], 'generations': cv_params['clf_sym_g'], 'p_crossover': cv_params['clf_sym_pcr'], 'p_hoist_mutation': cv_params['clf_sym_phm'], 'p_point_mutation': cv_params['clf_sym_ppm'], 'p_point_replace': cv_params['clf_sym_ppr'], 'p_subtree_mutation': cv_params['clf_sym_psm'], 'population_size': cv_params['clf_sym_ps'], 'tournament_size': cv_params['clf_sym_ts']}}} params_num_xticks = [ 'slr__k', 'clf__degree',
def test_sklearn_classifier_checks(): """Run the sklearn estimator validation checks on SymbolicClassifier""" custom_check_estimator(SymbolicClassifier(population_size=50, generations=5))
def test_parallel_custom_transformer(): """Regression test for running parallel training with custom transformer""" def _sigmoid(x1): with np.errstate(over='ignore', under='ignore'): return 1 / (1 + np.exp(-x1)) sigmoid = make_function(function=_sigmoid, name='sig', arity=1) est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0, n_jobs=2) est.fit(cancer.data, cancer.target) _ = pickle.dumps(est) # Unwrapped functions should fail sigmoid = make_function(function=_sigmoid, name='sig', arity=1, wrap=False) est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0, n_jobs=2) est.fit(cancer.data, cancer.target) assert_raises(AttributeError, pickle.dumps, est) # Single threaded will also fail in non-interactive sessions est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0) est.fit(cancer.data, cancer.target) assert_raises(AttributeError, pickle.dumps, est)
def test_program_input_validation_classifier(): """Check that guarded input validation raises errors""" # Check too much proba est = SymbolicClassifier(p_point_mutation=.5) assert_raises(ValueError, est.fit, cancer.data, cancer.target) # Check invalid init_method est = SymbolicClassifier(init_method='ni') assert_raises(ValueError, est.fit, cancer.data, cancer.target) # Check invalid const_ranges est = SymbolicClassifier(const_range=2) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(const_range=[2, 2]) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(const_range=(2, 2, 2)) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(const_range='ni') assert_raises(ValueError, est.fit, cancer.data, cancer.target) # And check acceptable, but strange, representations of const_range est = SymbolicClassifier(generations=2, const_range=(2, 2)) est.fit(cancer.data, cancer.target) est = SymbolicClassifier(generations=2, const_range=None) est.fit(cancer.data, cancer.target) est = SymbolicClassifier(generations=2, const_range=(4, 2)) est.fit(cancer.data, cancer.target) # Check invalid init_depth est = SymbolicClassifier(init_depth=2) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(init_depth=2) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(init_depth=[2, 2]) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(init_depth=(2, 2, 2)) assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(init_depth='ni') assert_raises(ValueError, est.fit, cancer.data, cancer.target) est = SymbolicClassifier(init_depth=(4, 2)) assert_raises(ValueError, est.fit, cancer.data, cancer.target) # And check acceptable, but strange, representations of init_depth est = SymbolicClassifier(generations=2, init_depth=(2, 2)) est.fit(cancer.data, cancer.target) # Check classifier metrics for m in ['log loss']: est = SymbolicClassifier(generations=2, metric=m) est.fit(cancer.data, cancer.target) # And check a fake one est = SymbolicClassifier(generations=2, metric='the larch') assert_raises(ValueError, est.fit, cancer.data, cancer.target) # Check classifier transformers for t in ['sigmoid']: est = SymbolicClassifier(generations=2, transformer=t) est.fit(cancer.data, cancer.target) # And check an incompatible one with wrong arity est = SymbolicClassifier(generations=2, transformer=sub2) assert_raises(ValueError, est.fit, cancer.data, cancer.target) # And check a fake one est = SymbolicClassifier(generations=2, transformer='the larch') assert_raises(ValueError, est.fit, cancer.data, cancer.target)
def test_print_overloading_estimator(): """Check that printing a fitted estimator results in 'pretty' output""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out output = str([gp.__str__() for gp in est]) print(output.replace("',", ",\n").replace("'", "")) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the classifier y = (y > .5).astype(int) est = SymbolicClassifier(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program)
if len(data) > 9: temp = [] for i in range(1, 10): x = int(data[i]) if data[i] != "?" else -1 temp.append(x) values.append(temp) if int(data[10]) == 2: alive.append("benign") else: alive.append("malignant") est = SymbolicClassifier(parsimony_coefficient=.01, feature_names=attributes, random_state=10000, verbose=1, stopping_criteria=0.15, population_size=2000, function_set={"mul", "div", "add", "sub", "log"}) est.fit(values[:400], alive[:400]) print("Accuracy: " + est.score(values[:400], alive[:400]).__str__()) # noinspection PyProtectedMember # print("Function: " + str(est._program)) # noinspection PyProtectedMember # graph = pydotplus.graphviz.graph_from_dot_data(est._program.export_graphviz()) # Image(graph.create_png()) # graph.write_png("dtree.png")
def test_sklearn_customized_checks(): """Run custom binary estimator validation checks on SymbolicClassifier""" rewritten_check_estimator(SymbolicClassifier(population_size=50, generations=5))
# https://gplearn.readthedocs.io/en/stable/reference.html#symbolic-classifier sc = SymbolicClassifier( population_size=2000, generations=20, tournament_size=25, const_range=(-1.5, 1.5), # init_depth=(10, 20), # init_method='full', init_method='half and half', function_set=( 'add', 'sub', 'mul', 'div', 'cos', 'log' # 'sin', 'min', 'max', 'sqrt', #'neg', 'tan' ), transformer='sigmoid', # metric=mf_wf, stopping_criteria=2.0, parsimony_coefficient=0.0001, p_crossover=0.7, p_subtree_mutation=0.2, p_hoist_mutation=0.00, p_point_mutation=0.1, p_point_replace=0.05, max_samples=.9, # feature_names=train_x.columns, low_memory=True, n_jobs=-1, verbose=1, random_state=None) pipeline_gp = make_pipeline(sc) param_grid_gp = {}