def train(): est_gp = SymbolicClassifier(population_size=250, generations=20, tournament_size=20, stopping_criteria=0.01, parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.05, p_hoist_mutation=0.0025, p_point_mutation=0.01, p_point_replace=0.0025, verbose=1, max_samples=0.9, feature_names=feature_names) est_gp.fit(X_train, y_train) print(est_gp._program) print(est_gp.score(X_train, y_train)) print(est_gp.score(X_test, y_test))
def test_symbolic_classifier_comparison(): """Test the classifier comparison example works""" X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable ] scores = [] for ds in datasets: X, y = ds X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) clf = SymbolicClassifier(random_state=0) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) scores.append(('%.2f' % score).lstrip('0')) assert_equal(scores, ['.95', '.93', '.95'])
if int(data[10]) == 2: # Not cancerous benign.append("benign") else: # Is cancerous benign.append("malignant") classifier = SymbolicClassifier( # Prevents 'bloat' used for large programs when evolution is increasing the size of the program with an # insignificant increase in fitness parsimony_coefficient=.01, # The list of attributes names, used in producing the final equation feature_names=attributes, # Displays each evolutionary state and fitness after each tournament is run # Note: If commented the user will need to be patient before final results are displayed verbose=1, # Stops the program early if the criteria is met. This is to prevent long computation time for minimal gain stopping_criteria=0.15, # When the population is 500 = ~85% 1000 = ~90% 2000 = ~95% population_size=2000, # basic functions are all that is required the inclusion of log functions provides roughly 5% increase in fitness function_set={"mul", "div", "add", "sub", "log"}) # The first 400 values in the file are trained and tested against the first 400 known values to be benign classifier.fit(values[:400], benign[:400]) # Returns the accuracy as a percentage from the fitness function print("Accuracy: " + (classifier.score(values[:400], benign[:400]) * 100).__str__() + "%") # Returns the function that achieves the above fitness to be entered into a tree in a breadth first fashion print("Function: " + str(classifier._program))
if len(data) > 9: temp = [] for i in range(1, 10): x = int(data[i]) if data[i] != "?" else -1 temp.append(x) values.append(temp) if int(data[10]) == 2: alive.append("benign") else: alive.append("malignant") est = SymbolicClassifier(parsimony_coefficient=.01, feature_names=attributes, random_state=10000, verbose=1, stopping_criteria=0.15, population_size=2000, function_set={"mul", "div", "add", "sub", "log"}) est.fit(values[:400], alive[:400]) print("Accuracy: " + est.score(values[:400], alive[:400]).__str__()) # noinspection PyProtectedMember # print("Function: " + str(est._program)) # noinspection PyProtectedMember # graph = pydotplus.graphviz.graph_from_dot_data(est._program.export_graphviz()) # Image(graph.create_png()) # graph.write_png("dtree.png")
accuracy = make_fitness(_accuracy, greater_is_better=True) est_gp = SymbolicClassifier( population_size=1000, generations=200, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36'), function_set=('add', 'sub', 'mul', 'div')) est_gp.fit(X_train, y_train) print('The best individual is : ') print(est_gp) print('Training set accuracy is %0.2f%%' % (100 * est_gp.score(X_train, y_train))) Predict_value = est_gp.predict(X_test) count = 0 for i in range(len(Predict_value)): if Predict_value[i] == y_test[i]: count += 1 print('Test set accuracy is %0.2f%%' % (100 * count / len(Predict_value)))