Exemple #1
0
def train():
    est_gp = SymbolicClassifier(population_size=250, generations=20, tournament_size=20,
                                stopping_criteria=0.01, parsimony_coefficient=0.001,
                                p_crossover=0.9, p_subtree_mutation=0.05, p_hoist_mutation=0.0025,
                                p_point_mutation=0.01, p_point_replace=0.0025, verbose=1,
                                max_samples=0.9, feature_names=feature_names)

    est_gp.fit(X_train, y_train)
    print(est_gp._program)
    print(est_gp.score(X_train, y_train))
    print(est_gp.score(X_test, y_test))
Exemple #2
0
def test_symbolic_classifier_comparison():
    """Test the classifier comparison example works"""

    X, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    datasets = [
        make_moons(noise=0.3, random_state=0),
        make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
    ]
    scores = []
    for ds in datasets:
        X, y = ds
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)
        clf = SymbolicClassifier(random_state=0)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(('%.2f' % score).lstrip('0'))

    assert_equal(scores, ['.95', '.93', '.95'])
    if int(data[10]) == 2:
        # Not cancerous
        benign.append("benign")
    else:
        # Is cancerous
        benign.append("malignant")

classifier = SymbolicClassifier(
    # Prevents 'bloat' used for large programs when evolution is increasing the size of the program with an
    # insignificant increase in fitness
    parsimony_coefficient=.01,
    # The list of attributes names, used in producing the final equation
    feature_names=attributes,
    # Displays each evolutionary state and fitness after each tournament is run
    # Note: If commented the user will need to be patient before final results are displayed
    verbose=1,
    # Stops the program early if the criteria is met. This is to prevent long computation time for minimal gain
    stopping_criteria=0.15,
    # When the population is 500 = ~85% 1000 = ~90% 2000 = ~95%
    population_size=2000,
    # basic functions are all that is required the inclusion of log functions provides roughly 5% increase in fitness
    function_set={"mul", "div", "add", "sub", "log"})

# The first 400 values in the file are trained and tested against the first 400 known values to be benign
classifier.fit(values[:400], benign[:400])
# Returns the accuracy as a percentage from the fitness function
print("Accuracy: " +
      (classifier.score(values[:400], benign[:400]) * 100).__str__() + "%")
# Returns the function that achieves the above fitness to be entered into a tree in a breadth first fashion
print("Function: " + str(classifier._program))
Exemple #4
0
    if len(data) > 9:
        temp = []
        for i in range(1, 10):
            x = int(data[i]) if data[i] != "?" else -1
            temp.append(x)

        values.append(temp)

        if int(data[10]) == 2:
            alive.append("benign")
        else:
            alive.append("malignant")

est = SymbolicClassifier(parsimony_coefficient=.01,
                         feature_names=attributes,
                         random_state=10000,
                         verbose=1,
                         stopping_criteria=0.15,
                         population_size=2000,
                         function_set={"mul", "div", "add", "sub", "log"})

est.fit(values[:400], alive[:400])
print("Accuracy: " + est.score(values[:400], alive[:400]).__str__())
# noinspection PyProtectedMember
# print("Function: " + str(est._program))

# noinspection PyProtectedMember
# graph = pydotplus.graphviz.graph_from_dot_data(est._program.export_graphviz())
# Image(graph.create_png())
# graph.write_png("dtree.png")
accuracy = make_fitness(_accuracy, greater_is_better=True)

est_gp = SymbolicClassifier(
    population_size=1000,
    generations=200,
    stopping_criteria=0.01,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.9,
    verbose=1,
    feature_names=('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
                   'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18',
                   'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26',
                   'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34',
                   'V35', 'V36'),
    function_set=('add', 'sub', 'mul', 'div'))
est_gp.fit(X_train, y_train)
print('The best individual is : ')
print(est_gp)
print('Training set accuracy is %0.2f%%' %
      (100 * est_gp.score(X_train, y_train)))

Predict_value = est_gp.predict(X_test)
count = 0
for i in range(len(Predict_value)):
    if Predict_value[i] == y_test[i]:
        count += 1
print('Test set accuracy is %0.2f%%' % (100 * count / len(Predict_value)))