def test_iris_neighbor(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Create model neighbor_iris_classifier = pk.NearestNeighbor(4, 3) # Train the model neighbor_iris_classifier.train( training_data=inputs_train, targets=outputs_train, ) # Save it pk.save(neighbor_iris_classifier, 'neighbor_iris_classifier.pkl') # Print accuracy accuracy = neighbor_iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = neighbor_iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix neighbor_iris_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Assert if it has enough accuracy assert neighbor_iris_classifier.accuracy(inputs_train, outputs_train) >= 100
def test_iris_bayes(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Create model bayes_iris_classifier = pk.GaussianNaiveBayes(4, 3) # Train bayes_iris_classifier.train(inputs_train, outputs_train) # Save it pk.save(bayes_iris_classifier, 'bayes_iris_classifier.pkl') # Print accuracy accuracy = bayes_iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = bayes_iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix bayes_iris_classifier.confusion_matrix( inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Assert accuracy assert (bayes_iris_classifier.accuracy(inputs_train, outputs_train)) >= 95
def test_mnist_svm(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import mnist # Download dataset if(not os.path.exists('mnist.pkl')): mnist.get() # Load mnist data set inputs_train, outputs_train, inputs_test, outputs_test = mnist.load() # Train on only first 10000 inputs_train = inputs_train[:10000] outputs_train = outputs_train[:10000] # Transform inputs using gaussian kernal sigma = 3.15 gaussian_inputs_train = pk.gaussian_kernel(inputs_train, inputs_train, sigma) gaussian_inputs_test = pk.gaussian_kernel(inputs_test, inputs_train, sigma) # Format the outputs for svm training, zeros to -1 svm_outputs_train = np.where(outputs_train==0, -1, 1) svm_outputs_test = np.where(outputs_test==0, -1, 1) # Create model svm_mnist_classifier = pk.SVM(gaussian_inputs_train.shape[1], 10) # Train the model svm_mnist_classifier.train( training_data=gaussian_inputs_train, targets=svm_outputs_train, batch_size=20, epochs=1000, optimizer=pk.Adam(learning_rate=3.5, decay_rate=0.95), testing_data=gaussian_inputs_test, testing_targets=svm_outputs_test, testing_freq=30, decay_freq=10 ) # Save it pk.save(svm_mnist_classifier, 'svm_mnist_classifier.pkl') # Print accuracy accuracy = svm_mnist_classifier.accuracy(gaussian_inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = svm_mnist_classifier.accuracy(gaussian_inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot performance svm_mnist_classifier.plot_performance() # Plot confusion matrix svm_mnist_classifier.confusion_matrix(gaussian_inputs_test, outputs_test) # Assert if it has enough accuracy assert svm_mnist_classifier.accuracy(gaussian_inputs_train, outputs_train) >= 90
def test_banknote(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import banknote # Download the dataset if(not os.path.exists('banknote.pkl')): banknote.get() # Load banknote data set inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() # Normalize dataset array_min, array_max = pk.get_minmax(inputs_train) inputs_train = pk.normalize_minmax(inputs_train, array_min, array_max) inputs_test = pk.normalize_minmax(inputs_test, array_min, array_max) # Create polynomial features inputs_train_poly = pk.polynomial(inputs_train) inputs_test_poly = pk.polynomial(inputs_test) # Create model banknote_classifier = pk.LogisticRegression(inputs_train_poly.shape[1], 1) # Train the model banknote_classifier.train( training_data=inputs_train_poly, targets=outputs_train, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.06, decay_rate=0.99), testing_data=inputs_test_poly, testing_targets=outputs_test, testing_freq=30, decay_freq=40 ) # Save it pk.save(banknote_classifier, 'banknote_classifier.pkl') # Plot performance banknote_classifier.plot_performance() # Print accuracy accuracy = banknote_classifier.accuracy(inputs_train_poly, outputs_train) print('Train accuracy:', accuracy) accuracy = banknote_classifier.accuracy(inputs_test_poly, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix banknote_classifier.confusion_matrix(inputs_test_poly, outputs_test) # Assert if it has enough accuracy assert banknote_classifier.accuracy(inputs_test_poly, outputs_test) >= 99
def test_adam_fashion(): import os import numpy as np import pykitml as pk from pykitml.datasets import mnist # If the dataset is not available then download it if (not os.path.exists('mnist.pkl')): mnist.get(type='fashion') # Load dataset training_data, training_targets, testing_data, testing_targets = mnist.load( ) # Create a new neural network fashion_classifier = pk.NeuralNetwork([784, 100, 10]) # Train it fashion_classifier.train(training_data=training_data, targets=training_targets, batch_size=50, epochs=1200, optimizer=pk.Adam(learning_rate=0.012, decay_rate=0.95), testing_data=testing_data, testing_targets=testing_targets, testing_freq=30, decay_freq=10) # Save it pk.save(fashion_classifier, 'fashion_classifier_network.pkl') # Show performance accuracy = fashion_classifier.accuracy(training_data, training_targets) print('Train Accuracy:', accuracy) accuracy = fashion_classifier.accuracy(testing_data, testing_targets) print('Test Accuracy:', accuracy) # Plot performance fashion_classifier.plot_performance() # Show confusion matrix fashion_classifier.confusion_matrix(training_data, training_targets, gnames=[ 'T-shirt/Top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot' ]) # Assert if it has enough accuracy assert fashion_classifier.accuracy(training_data, training_targets) > 84
def test_search(): import os import numpy as np import pykitml as pk from pykitml.datasets import mnist # If the dataset is not available then download it if (not os.path.exists('mnist.pkl')): mnist.get(type='fashion') # Load dataset training_data, training_targets, testing_data, testing_targets = mnist.load( ) # Search for hyperparameters # Learning rate alpha = 10^-4 to 10^-2 # Decay rate = 0.8 to 1 # Decay frequency = 10 to 30 # Batch size = 10 to 100 search = pk.RandomSearch() for alpha, decay, decay_freq, bsize in search.search( 10, 3, 5, [-4, -2, 'log'], [0.8, 1, 'float'], [10, 30, 'int'], [10, 100, 'int']): # Create a new neural network fashion_classifier = pk.NeuralNetwork([784, 100, 10]) # Train it fashion_classifier.train(training_data=training_data, targets=training_targets, batch_size=bsize, epochs=1200, optimizer=pk.Adam(learning_rate=alpha, decay_rate=decay), testing_freq=100, decay_freq=decay_freq) cost = fashion_classifier.cost(testing_data, testing_targets) search.set_cost(cost) # Save the best model if (search.best): pk.save(fashion_classifier, 'best.pkl') # Load the best model fashion_classifier = pk.load('best.pkl') # Show performance accuracy = fashion_classifier.accuracy(testing_data, testing_targets) print('Test Accuracy:', accuracy) # Assert accuracy assert accuracy > 84
def test_iris(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Normalize inputs in the dataset inputs_min, inputs_max = pk.get_minmax(inputs_train) inputs_train = pk.normalize_minmax(inputs_train, inputs_min, inputs_max) inputs_test = pk.normalize_minmax(inputs_test, inputs_min, inputs_max) # Create model iris_classifier = pk.LogisticRegression(4, 3) # Train the model iris_classifier.train(training_data=inputs_train, targets=outputs_train, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.4, decay_rate=0.99), testing_data=inputs_test, testing_targets=outputs_test, testing_freq=30, decay_freq=20) # Save it pk.save(iris_classifier, 'iris_classifier.pkl') # Print accuracy accuracy = iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot performance iris_classifier.plot_performance() # Plot confusion matrix iris_classifier.confusion_matrix( inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Assert if it has enough accuracy assert iris_classifier.accuracy(inputs_train, outputs_train) >= 98
def test_iris_svm(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Format the outputs for svm training, zeros to -1 svm_outputs_train = np.where(outputs_train == 0, -1, 1) svm_outputs_test = np.where(outputs_test == 0, -1, 1) # Create model svm_iris_classifier = pk.SVM(4, 3) # Train the model svm_iris_classifier.train(training_data=inputs_train, targets=svm_outputs_train, batch_size=20, epochs=1000, optimizer=pk.Adam(learning_rate=3, decay_rate=0.95), testing_data=inputs_test, testing_targets=svm_outputs_test, testing_freq=30, decay_freq=10) # Save it pk.save(svm_iris_classifier, 'svm_iris_classifier.pkl') # Print accuracy accuracy = svm_iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = svm_iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot performance svm_iris_classifier.plot_performance() # Plot confusion matrix svm_iris_classifier.confusion_matrix( inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Assert if it has enough accuracy assert svm_iris_classifier.accuracy(inputs_train, outputs_train) >= 97
def test_heart(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if(not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heartdisease data set inputs, outputs = heartdisease.load() # Normalize inputs in the dataset inputs_min, inputs_max = pk.get_minmax(inputs) inputs = pk.normalize_minmax(inputs, inputs_min, inputs_max, cols=[0, 3, 4, 7, 9]) # Change categorical values to onehot values inputs = pk.onehot_cols(inputs, [1, 2, 5, 6, 8, 10, 11, 12]) # Create model heart_classifier = pk.LogisticRegression(35, 1) # Train the model heart_classifier.train( training_data=inputs, targets=outputs, batch_size=10, epochs=1500, optimizer=pk.Adam(learning_rate=0.015, decay_rate=0.99), testing_freq=30, decay_freq=40 ) # Save it pk.save(heart_classifier, 'heart_classifier.pkl') # Print accuracy and plot performance heart_classifier.plot_performance() accuracy = heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix heart_classifier.confusion_matrix(inputs, outputs) # Assert if it has enough accuracy assert heart_classifier.accuracy(inputs, outputs) >= 87
def test_adult_tree(): import os import numpy as np import pykitml as pk from pykitml.datasets import adult # Download the dataset if (not os.path.exists('adult.data.pkl')): adult.get() # Load adult data set inputs_train, outputs_train, inputs_test, outputs_test = adult.load() outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model ftypes = [ 'continues', 'categorical', 'continues', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'continues', 'continues', 'continues', 'categorical' ] tree_adult_classifier = pk.DecisionTree(13, 2, max_depth=100, min_split=100, feature_type=ftypes) # Train tree_adult_classifier.train(inputs_train, outputs_train) # Save it pk.save(tree_adult_classifier, 'tree_adult_classifier.pkl') # Print accuracy accuracy = tree_adult_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = tree_adult_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix tree_adult_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True']) # Assert accuracy assert (tree_adult_classifier.accuracy(inputs_test, outputs_test)) >= 84
def test_banknote_forest(): import os import numpy as np import pykitml as pk from pykitml.datasets import banknote # Download the dataset if (not os.path.exists('banknote.pkl')): banknote.get() # Load heart data set inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() # Change 0/False to [1, 0] # Change 1/True to [0, 1] outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model ftypes = ['continues'] * 4 forest_banknote_classifier = pk.RandomForest(4, 2, max_depth=9, feature_type=ftypes) # Train forest_banknote_classifier.train(inputs_train, outputs_train) # Save it pk.save(forest_banknote_classifier, 'forest_banknote_classifier.pkl') # Print accuracy accuracy = forest_banknote_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = forest_banknote_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix forest_banknote_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True']) # Assert accuracy assert (forest_banknote_classifier.accuracy(inputs_test, outputs_test)) >= 98
def test_heart_tree(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if (not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heart data set inputs, outputs = heartdisease.load() outputs = pk.onehot(outputs) # Create model ftypes = [ 'continues', 'categorical', 'categorical', 'continues', 'continues', 'categorical', 'categorical', 'continues', 'categorical', 'continues', 'categorical', 'categorical', 'categorical' ] tree_heart_classifier = pk.DecisionTree(13, 2, max_depth=7, feature_type=ftypes) # Train tree_heart_classifier.train(inputs, outputs) # Save it pk.save(tree_heart_classifier, 'tree_heart_classifier.pkl') # Print accuracy accuracy = tree_heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix tree_heart_classifier.confusion_matrix(inputs, outputs, gnames=['False', 'True']) # Plot descision tree tree_heart_classifier.show_tree() # Assert accuracy assert (tree_heart_classifier.accuracy(inputs, outputs)) >= 94
def on_quit(): # Process the collected data inputs_numpy = np.array(inputs) outputs_numpy = [] for output in outputs: values = output.split() if (values[1] == 'true'): onehot = [1, 0, 0] elif (values[5] == 'true'): onehot = [0, 1, 0] else: onehot = [0, 0, 1] outputs_numpy.append(onehot) outputs_numpy = np.array(outputs_numpy) print('Collected', inputs_numpy.shape[0], 'frames.') if (inputs_numpy.shape[0] < 2000): print('Warning not enough data points.') save((inputs_numpy[:2000], outputs_numpy[:2000]), 'Data/' + filename)
def test_heart_bayes(): import os.path import numpy as np import pykitml as pk from pykitml.datasets import heartdisease # Download the dataset if (not os.path.exists('heartdisease.pkl')): heartdisease.get() # Load heart data set inputs, outputs = heartdisease.load() # Change 0/False to [1, 0] # Change 1/True to [0, 1] outputs = pk.onehot(outputs) distrbutions = [ 'gaussian', 'binomial', 'multinomial', 'gaussian', 'gaussian', 'binomial', 'multinomial', 'gaussian', 'binomial', 'gaussian', 'multinomial', 'multinomial', 'multinomial' ] # Create model bayes_heart_classifier = pk.NaiveBayes(13, 2, distrbutions) # Train bayes_heart_classifier.train(inputs, outputs) # Save it pk.save(bayes_heart_classifier, 'bayes_heart_classifier.pkl') # Print accuracy accuracy = bayes_heart_classifier.accuracy(inputs, outputs) print('Accuracy:', accuracy) # Plot confusion matrix bayes_heart_classifier.confusion_matrix(inputs, outputs, gnames=['False', 'True']) # Assert accuracy assert (bayes_heart_classifier.accuracy(inputs, outputs)) > 84
def test_fishlength(): import numpy as np import pykitml as pk from pykitml.datasets import fishlength # Load the dataset inputs, outputs = fishlength.load() # Normalize inputs array_min, array_max = pk.get_minmax(inputs) inputs = pk.normalize_minmax(inputs, array_min, array_max) # Create polynomial features inputs_poly = pk.polynomial(inputs) # Normalize outputs array_min, array_max = pk.get_minmax(outputs) outputs = pk.normalize_minmax(outputs, array_min, array_max) # Create model fish_classifier = pk.LinearRegression(inputs_poly.shape[1], 1) # Train the model fish_classifier.train(training_data=inputs_poly, targets=outputs, batch_size=22, epochs=200, optimizer=pk.Adam(learning_rate=0.02, decay_rate=0.99), testing_freq=1, decay_freq=10) # Save model pk.save(fish_classifier, 'fish_classifier.pkl') # Plot performance fish_classifier.plot_performance() # Print r2 score print('r2score:', fish_classifier.r2score(inputs_poly, outputs)) # Assert if it has enough accuracy assert fish_classifier.cost(inputs_poly, outputs) <= 0
def test_sonar_forest(): import os import numpy as np import pykitml as pk from pykitml.datasets import sonar # Download the dataset if (not os.path.exists('sonar.pkl')): sonar.get() # Load the sonar dataset inputs_train, outputs_train, inputs_test, outputs_test = sonar.load() outputs_train = pk.onehot(outputs_train) outputs_test = pk.onehot(outputs_test) # Create model forest_sonar_classifier = pk.RandomForest(60, 2, max_depth=9, feature_type=['continues'] * 60) # Train the model forest_sonar_classifier.train(inputs_train, outputs_train, num_feature_bag=60) # Save it pk.save(forest_sonar_classifier, 'forest_sonar_classifier.pkl') # Print accuracy accuracy = forest_sonar_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = forest_sonar_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix forest_sonar_classifier.confusion_matrix(inputs_test, outputs_test, gnames=['False', 'True'])
def test_iris_tree(): import numpy as np import pykitml as pk from pykitml.datasets import iris # Load iris data set inputs_train, outputs_train, inputs_test, outputs_test = iris.load() # Create model tree_iris_classifier = pk.DecisionTree(4, 3, max_depth=5, feature_type=['continues'] * 4) # Train tree_iris_classifier.train(inputs_train, outputs_train) # Save it pk.save(tree_iris_classifier, 'tree_iris_classifier.pkl') # Print accuracy accuracy = tree_iris_classifier.accuracy(inputs_train, outputs_train) print('Train accuracy:', accuracy) accuracy = tree_iris_classifier.accuracy(inputs_test, outputs_test) print('Test accuracy:', accuracy) # Plot confusion matrix tree_iris_classifier.confusion_matrix( inputs_test, outputs_test, gnames=['Setosa', 'Versicolor', 'Virginica']) # Plot decision tree tree_iris_classifier.show_tree() # Assert accuracy assert (tree_iris_classifier.accuracy(inputs_train, outputs_train)) >= 98
for file in files[1:]: file_inputs, file_outputs = pk.load(file) inputs = np.append(inputs, (file_inputs), axis=0) outputs = np.append(outputs, (file_outputs), axis=0) return inputs, outputs train_files = [f'Data/session{x}.pkl' for x in range(1, 51)] \ + [f'Data/knockout_session{x}.pkl' for x in range(1, 21)] test_files = [f'Data/session{x}.pkl' for x in range(51, 61)] \ + [f'Data/knockout_session{x}.pkl' for x in range(21, 26)] dev_files = [f'Data/session{x}.pkl' for x in range(61, 71)] \ + [f'Data/knockout_session{x}.pkl' for x in range(26, 31)] # Shuffle files random.shuffle(train_files) random.shuffle(test_files) random.shuffle(dev_files) # Combine files into numpy arrays train_inputs, train_outputs = combine(train_files) test_inputs, test_outputs = combine(test_files) dev_inputs, dev_outputs = combine(dev_files) # Save them pk.save((train_inputs, train_outputs, test_inputs, test_outputs, dev_inputs, dev_outputs), 'Data/traindata.pkl')
train_inputs, train_outputs, test_inputs, test_outputs, dev_inputs, dev_outputs = pk.load( 'Data/traindata.pkl') train_inputs, test_inputs, dev_inputs = train_inputs / 255, test_inputs / 255, dev_inputs / 255 # Compress inputs using PCA, pick 1000 random examples for PCA rand_indices = np.random.choice(train_inputs.shape[0], 1000, replace=False) pca = pk.PCA(train_inputs[rand_indices], no_components=64) print('PCA Retention:', pca.retention) # Transform dataset train_inputs = pca.transform(train_inputs) test_inputs = pca.transform(test_inputs) dev_inputs = pca.transform(dev_inputs) # Save pca model pk.save(pca, 'pca.pkl') # Start hyperparameter search search = pk.RandomSearch() for alpha, decay, decay_freq, in search.search(10, 2, 5, [-4, -3, 'log'], [0.9, 1, 'float'], [50, 100, 'int']): model = pk.LSTM([64, 100, 3]) model.train(training_data=train_inputs, targets=train_outputs, batch_size=200, epochs=10000, optimizer=pk.Adam(learning_rate=alpha, decay_rate=decay), testing_data=test_inputs,