Example #1
0
def test_basic_binary_classification():
    X, Y = datasets.make_classification(n_classes=2, n_samples=1000)
    trees = emtrees.RandomForest(n_estimators=10, max_depth=10)
    X = (X * 2**16).astype(int) # convert to integer
    scores = model_selection.cross_val_score(trees, X, Y, scoring='accuracy')

    assert numpy.mean(scores) > 0.7, scores
Example #2
0
def test_trees_to_dot():
    X, Y = datasets.make_classification(n_classes=2, n_samples=10)
    trees = emtrees.RandomForest(n_estimators=3, max_depth=5)
    X = (X * 2**16).astype(int) # convert to integer
    trees.fit(X, Y)

    dot = trees.to_dot(name='ffoo')
    with open('tmp/trees.dot', 'w') as f:
        f.write(dot)
Example #3
0
def test_binary_classification_compiled():
    X, Y = datasets.make_classification(n_classes=2)
    trees = emtrees.RandomForest(n_estimators=3, max_depth=5)
    X = (X * 2**16).astype(int) # convert to integer
    trees.fit(X, Y)

    p = build_classifier(trees)
    predicted = run_classifier(p, X)
    accuracy = metrics.accuracy_score(Y, predicted)

    assert accuracy > 0.9 # testing on training data
Example #4
0
def test_inline_compiled():
    X, Y = datasets.make_classification(n_classes=2, random_state=1)
    trees = emtrees.RandomForest(n_estimators=3, max_depth=5, random_state=1)
    X = (X * 2**16).astype(int)  # convert to integer
    trees.fit(X, Y)

    p = build_classifier(trees,
                         'myinline',
                         func='myinline_predict(values, length)')
    predicted = run_classifier(p, X)
    accuracy = metrics.accuracy_score(Y, predicted)

    assert accuracy > 0.9  # testing on training data
Example #5
0
digits = datasets.load_digits()
Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data,
                                                digits.target,
                                                random_state=rnd)
Xtrain = (Xtrain * 2**16).astype(numpy.int32)
Xtest = (Xtest * 2**16).astype(numpy.int32)

print('Loading digits dataset. 8x8=64 features')

# 0.95+ with n_estimators=40, max_depth=20
# 0.90+ with n_estimators=10, max_depth=10
trees = 40
max_depth = 20
print('Training {} trees with max_depth {}'.format(trees, max_depth))
model = emtrees.RandomForest(n_estimators=trees,
                             max_depth=max_depth,
                             random_state=rnd)
model.fit(Xtrain, ytrain)

# Predict
ypred = model.predict(Xtest)
print('Accuracy on validation set {:.2f}%'.format(
    metrics.accuracy_score(ypred, ytest) * 100))

m = numpy.max(Xtrain), numpy.min(Xtrain)

code = model.output_c('digits')
filename = 'digits.h'
with open(filename, 'w') as f:
    f.write(code)
print('Wrote C code to', filename)
Example #6
0
# load and prepare data
filename = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
df = pandas.read_csv(filename, header=None)
target_columns = [60]
data_columns = list(set(list(df.columns)).difference(target_columns))

# convert floats to integer
df[data_columns] = (df[data_columns] * 2**16).astype(int)
df[target_columns] = df[target_columns[0]].astype('category').cat.codes
X_train, X_test, Y_train, Y_test = train_test_split(df[data_columns],
                                                    df[target_columns])

# Run training
n_trees = 5
estimator = emtrees.RandomForest(n_estimators=n_trees, max_depth=10)
estimator.fit(X_train, Y_train)
Y_pred = estimator.predict(X_test)
a = accuracy_score(Y_test, Y_pred)

print('Trees: %d' % n_trees)
print('Mean Accuracy: %.3f%%' % (a * 100))

# Output C code
code = estimator.output_c('sonar')
outfile = 'sonar.h'
with open(outfile, 'w') as f:
    f.write(code)

print('Wrote C code to', outfile)