def test_bbclassifier(): c = BBClassifier(num_epochs=3, use_normal_dist_bases=True, use_evenly_spaced_periods=True, random_state=rand_seed) train_data = np.array([ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0 ]).reshape(-1, 1) train_labels = np.array([ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ]).reshape(-1) c.fit(train_data, train_labels) preds = c.predict([[0.0]]) actual_preds = np.array(preds) expect_preds = np.array([0]) np.testing.assert_array_equal(actual_preds, expect_preds) preds = c.predict([[1.0]]) actual_preds = np.array(preds) expect_preds = np.array([1]) np.testing.assert_array_equal(actual_preds, expect_preds)
# h = .05 # step size in the mesh h = .02 # step size in the mesh num_samples = 500 rand_seed = 42 color_maps = ['Reds', 'Blues', 'Greens', 'Oranges', 'Purples', 'Greys'] color_list = ['r', 'b', 'g', 'orange', 'purple', 'grey'] names = [ "BBClassifier", "Nearest Neighbors", "Decision Tree", "Neural Net", "Naive Bayes" ] classifiers = [ BBClassifier(num_epochs=3, use_normal_dist_bases=True, use_evenly_spaced_periods=True, random_state=rand_seed), KNeighborsClassifier(3), DecisionTreeClassifier(max_depth=5, random_state=rand_seed), MLPClassifier(alpha=1, max_iter=1000, hidden_layer_sizes=(100, 100), random_state=rand_seed), GaussianNB() ] noise = 0.1 X, y = make_classification(n_samples=num_samples, n_features=2, n_redundant=0,
# ============================================================================== # sklearn_style_classifier.py # ============================================================================== from brainblocks.tools import BBClassifier from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler import time import numpy as np h = .02 # step size in the mesh num_samples = 500 rand_seed = 42 noise = 0.1 clf = BBClassifier(num_epochs=3, random_state=rand_seed) X, y = make_classification(n_samples=num_samples, n_features=2, n_redundant=0, n_informative=2, random_state=rand_seed, n_clusters_per_class=1, n_classes=2) rng = np.random.RandomState(rand_seed) X += 2 * rng.uniform(low=0.0, high=noise, size=X.shape) # scale the data X = MinMaxScaler().fit_transform(X)
def evaluate(datasets, classifier_configs): # Evaluation Configuration # # h = .02 h = 0.05 x_boundary = 0.5 y_boundary = 0.5 # x_boundary = 6.5 # y_boundary = 6.5 # x_boundary = 2.5 # y_boundary = 2.5 # j_boundary = 12.0 # y_boundary = 12.0 # iterate over datasets results_per_dataset = [] for ds_cnt, ds in enumerate(datasets): print("Data Set %d" % ds_cnt) # preprocess dataset, split into training and test part X, y, X_plot = ds X = MinMaxScaler().fit_transform(X) X_plot = MinMaxScaler().fit_transform(X_plot) # uniform mesh # x_min, x_max = X[:, 0].min() - x_boundary, X[:, 0].max() + x_boundary # y_min, y_max = X[:, 1].min() - y_boundary, X[:, 1].max() + y_boundary # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) X_train, X_test, y_train, y_test, X_plot_train, X_plot_test = \ train_test_split(X, y, X_plot, test_size=.1, random_state=42) # all data in a tuple for comparison across classifiers and for plotting # split_data = (X, y, xx, yy, X_train, X_test, y_train, y_test) split_data = (X, y, X_plot, X_train, X_test, y_train, y_test, X_plot_train, X_plot_test) # collect results for each classifier experiment on this data results_per_classifier = [] # iterate over classifiers for name, bb_config in classifier_configs: num_input_dims = X_train.shape[1] bb_config['num_input_dims'] = num_input_dims # clf = KNeighborsClassifier(3) clf = BBClassifier(**bb_config) print("Train %s" % name) # train the model clf.fit(X_train, y_train) # compute accuracy on test values score = clf.score(X_test, y_test) results_per_classifier.append((name, score)) del clf data_result = {'data': split_data, 'results': results_per_classifier} results_per_dataset.append(data_result) return results_per_dataset