def main(): #loading in the 8x8 version of the dataset, as the whole dataset took too much time to run digits = datasets.load_digits() #Converting to floats data = digits.data.astype("float") #MinMax regularization data = (data - data.min()) / (data.max() - data.min()) #splitting data X_train, X_test, y_train, y_test = train_test_split(data, digits.target, test_size=0.2) #converting labels from integers to vectors y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) #training network print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], 32, 16, 10]) #here instead of putting in hte number of input, we just put the data in print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=1000) #evaluating network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))
def main(): #fetching data, where X is the dataset and y is labels of the data X, y = fetch_openml('mnist_784', version=1, return_X_y=True) #converting data into numpy arrays and data type float, bc otherwise the model won't converge later X = np.array(X, dtype=np.float128) y = np.array(y, dtype=np.float128) #predefining classes classes = sorted(set(y)) nclasses = len(classes) # MinMax regularization X = (X - X.min())/(X.max() - X.min()) #splitting data into training and test dataset X_train, X_test, y_train, y_test = train_test_split(X, #our data y, #labels random_state=9, #makes it reproducible train_size=0.8, #splitting by 80%-20% test_size=0.2) #converting labels from integers to vectors y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) #training network (from 784 nodes to 10) print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], 400, 120, 10]) print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=500) #evaluating network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))
def nn_run(self): if sum(self.args["layers"]) < int(self.X_train.shape[1]) + 10: layers = [self.X_train.shape[1]] + self.args["layers"] + [10] else: print( f"Number of hidden layers should be below {self.X_train.shape[1]+10}. Using default hidden layers: [32,16]" ) layers = [self.X_train.shape[1], 32, 16, 10] print("[INFO] training network...") self.nn = NeuralNetwork([self.X_train.shape[1], 32, 16, 10]) print("[INFO] {}".format(self.nn)) self.nn.fit(self.X_train, self.y_train, epochs=self.args["epochs"]) if self.args["save_model_path"] != "": out_path = os.path.join(self.args["save_model_path"], "nn_model.pkl") joblib.dump(self.nn, out_path)
def main(data_path, epochs): # Load data as np arrays img, label = load_mnist(data_path) # We are assuming the min and max values for pixel intensities # are between 0 and 255. The minmax normalization from session 7 # might give values between say 10 and 230, which might not work # well when given a new image that has pixel values above or below those img = img / 255.0 # normalize pixel vals to between 0 and 1 as float classes = sorted(set(label)) num_classes = len(classes) # Split our data 80/20 - train/test img_train, img_test, label_train, label_test = train_test_split( img, label, random_state=1337, test_size=0.2) # Convert labels to binary representation (e.g. 2 becomes [0,0,1,0,0,0,0,0,0,0]) label_train = LabelBinarizer().fit_transform(label_train) label_test = LabelBinarizer().fit_transform(label_test) # Specify the neural network structure neural_network = NeuralNetwork([ img_train.shape[1], 32, 16, num_classes ]) # 1 input node for every pixel in images, 1 output node for every class # Train the model neural_network.fit(img_train, label_train, epochs=epochs) # Make predictions on all test images label_pred = neural_network.predict(img_test) label_pred = label_pred.argmax( axis=1) # Give us the highest probability label # Generate comparative metrics with test data classifier_metrics = metrics.classification_report( label_test.argmax(axis=1), label_pred) print(classifier_metrics)
def train_network(self, X_train, y_train): """ Training network with hidden layers Input: - X_train: Array of preprocessed training images - Y_train: Array of binarised training labels Appends nn_trained to self """ # Define size of input layer: size of images input_layer = int(X_train.shape[1]) # Define size of output layer: number of labels output_layer = int(y_train.shape[1]) # Define nn shape with input, hidden and output layers self.nn_shape = [input_layer] + self.hidden_layers + [output_layer] # Defining neural network from input shape - hidden layers - 10 output labels self.nn_trained = NeuralNetwork(self.nn_shape) # Fitting neural network on training data self.nn_trained.fit(X_train, y_train, epochs=self.epochs, displayUpdate=1)
def main(): ''' --------------Defining command line arguments--------------- ''' ap = argparse.ArgumentParser( description="[INFO] creating benchmark classifier") #1. argument: Number of hidden layers in first pile of hidden layers: ap.add_argument( "-hl1", #flag "--hidden_layer_1", required=False, # You do not need to give any arguments, but you can. default=32, # If you don't the default is 32 hiddenlayers. type=int, # The input has has to be a integer. help="The the number of hidden layers.") #2. argument: Number of hidden layers in second pile of hidden layers: ap.add_argument( "-hl2", #flag "--hidden_layer_2", required=False, # You do not need to give any arguments, but you can. default=0, type=int, # The input has has to be a integer. help="The the number of hidden layers.") #3. argument: Number of hidden layers in third pile of hidden layers: ap.add_argument( "-hl3", #flag "--hidden_layer_3", required=False, # You do not need to give any arguments, but you can. default=0, type=int, # The input has has to be a integer. help="The the number of hidden layers.") #4. argument: number of epochs the data should train on: ap.add_argument("-epochs", "--number_of_epochs", required=False, default=100, type=int, help="The number of times the data is run through") args = vars(ap.parse_args()) # Putting the arguments into variables: hidden_layer_1 = args["hidden_layer_1"] hidden_layer_2 = args["hidden_layer_2"] hidden_layer_3 = args["hidden_layer_3"] number_of_epochs = args["number_of_epochs"] ''' -----------------------Downloading and cleaning data--------------------------- ''' # Fetching/downloading data set X, y = fetch_openml('mnist_784', version=1, return_X_y=True) #X is the the images, y is the the category. X = np.array(X) y = np.array(y) X = (X - X.min()) / (X.max() - X.min()) #Creating training and test data. X_train, X_test, y_train, y_test = train_test_split( X, y, #random_state=9, train_size=7500, test_size=2500) # convert labels from integers to vectors y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) ''' --------------------Creating different options for piles of hidden layers-------------------- ''' #train network: # If only 1 argument of hidden layers are givin: if hidden_layer_1 > 0 and hidden_layer_2 == 0 and hidden_layer_3 == 0: print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], hidden_layer_1, 10]) #CLI-argument print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=number_of_epochs) #CLI-argument print("___________ 2 args _____________" ) #For my self so i can see that the code took 2 arguments # evaluate network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions)) # If 2 argument of hidden layers are givin: elif hidden_layer_1 > 0 and hidden_layer_2 > 0 and hidden_layer_3 == 0: print("[INFO] training network...") nn = NeuralNetwork( [X_train.shape[1], hidden_layer_1, hidden_layer_2, 10]) #CLI-argument print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=number_of_epochs) #CLI-argument print("___________ 3 args _____________" ) #For my self so i can see that the code took 3 arguments # evaluate network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions)) # If 3 argument of hidden layers are givin: elif hidden_layer_1 > 0 and hidden_layer_2 > 0 and hidden_layer_3 > 0: print("[INFO] training network...") nn = NeuralNetwork([ X_train.shape[1], hidden_layer_1, hidden_layer_2, hidden_layer_3, 10 ]) #CLI-argument print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=number_of_epochs) #CLI-argument print("___________ 4 args _____________" ) #For my self so i can see that the code took 4 arguments # evaluate network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))
class nn_mnist: #Create init function loading data and creating self.args with arguments from argparse def __init__(self, args): self.args = args self.data = pd.read_csv(self.args["mnist"]) #Get data from dataframe format into np.array and perform min/max normalization def data_wrangle(self): self.y = np.array(self.data.y) self.data = self.data.drop("y", axis=1) self.X = np.array(self.data) self.X = (self.X - self.X.min()) / (self.X.max() - self.X.min()) #Make train and test split with optional test_split argument from self.args #Perform the labelBinarizer def split(self): self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self.X, self.y, random_state=9, test_size=self.args["test_split"]) self.y_train = LabelBinarizer().fit_transform(self.y_train) self.y_test = LabelBinarizer().fit_transform(self.y_test) #Define the neural network with specified layers from self.args #Train the model with specified amount of epochs #Save the model if save_model_path is provided def nn_run(self): if sum(self.args["layers"]) < int(self.X_train.shape[1]) + 10: layers = [self.X_train.shape[1]] + self.args["layers"] + [10] else: print( f"Number of hidden layers should be below {self.X_train.shape[1]+10}. Using default hidden layers: [32,16]" ) layers = [self.X_train.shape[1], 32, 16, 10] print("[INFO] training network...") self.nn = NeuralNetwork([self.X_train.shape[1], 32, 16, 10]) print("[INFO] {}".format(self.nn)) self.nn.fit(self.X_train, self.y_train, epochs=self.args["epochs"]) if self.args["save_model_path"] != "": out_path = os.path.join(self.args["save_model_path"], "nn_model.pkl") joblib.dump(self.nn, out_path) #Print results to terminal #Save results to .csv file at desire output def results(self): predictions = self.nn.predict(self.X_test) predictions = predictions.argmax(axis=1) print(classification_report(self.y_test.argmax(axis=1), predictions)) results_df = pd.DataFrame( classification_report(self.y_test.argmax(axis=1), predictions, output_dict=True)).transpose() output_path = os.path.join(self.args["output"], "results_df_nn.csv") results_df.to_csv(output_path) #Load test_image if provided #Wrangle the data into the right format #Predict value using the neural network and print result def pred_new_number(self): test_image = cv2.imread(self.args["test_image"]) gray = cv2.bitwise_not(cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY)) compressed = cv2.resize(gray, (28, 28), interpolation=cv2.INTER_AREA) flatten = [item for sublist in compressed for item in sublist] flatten_scaled = (np.array(flatten) - np.array(flatten).min()) / ( np.array(flatten).max() - np.array(flatten).min()) flatten_reshaped = flatten_scaled.reshape(1, -1) prediction = self.nn.predict(flatten_reshaped).argmax(axis=1) print(f"The test image is predicted to show a {str(prediction)}") #Run all the functions def run(self): self.data_wrangle() self.split() self.nn_run() self.results() if self.args["test_image"] != "": self.pred_new_number()
class NN_Classifier: def __init__(self, hidden_layers, epochs): # Variables defined when initialising class self.hidden_layers = hidden_layers self.epochs = epochs # Variables that will be defined throughout the functions # Shape of NN self.nn_shape = None # Trained NN self.nn_trained = None # Classification report self.nn_metrics = None def train_network(self, X_train, y_train): """ Training network with hidden layers Input: - X_train: Array of preprocessed training images - Y_train: Array of binarised training labels Appends nn_trained to self """ # Define size of input layer: size of images input_layer = int(X_train.shape[1]) # Define size of output layer: number of labels output_layer = int(y_train.shape[1]) # Define nn shape with input, hidden and output layers self.nn_shape = [input_layer] + self.hidden_layers + [output_layer] # Defining neural network from input shape - hidden layers - 10 output labels self.nn_trained = NeuralNetwork(self.nn_shape) # Fitting neural network on training data self.nn_trained.fit(X_train, y_train, epochs=self.epochs, displayUpdate=1) def evaluate_network(self, X_test, y_test): """ Evaluating network based on predictions of test labels Input: - X_test: Array of preprocessed test images - y_test: Array of binarised test labels Appends nn_metrics to self """ # Predicting labels, getting max predictions = self.nn_trained.predict(X_test) predictions = predictions.argmax(axis=1) # Getting classification report self.nn_metrics = classification_report(y_test.argmax(axis=1), predictions) def print_metrics(self): """ Printing performance metrics to the command line """ print( f"[OUTPUT] Perfomance metrics of the Neural Network Classifier with layers {self.nn_shape}:\n{self.nn_metrics}" ) def save_metrics(self, output_directory, output_filename): """ Saving performance metrics in txt file in defined output path Input: - Output directory: Directory to of where the file should be stored - Output filename: Name of the file, should end with .txt """ # Create output directory, if is does not exist already if not os.path.exists(output_directory): os.mkdir(output_directory) # Define output filepath, using unique_path to prevent overwriting output_filepath = os.path.join(output_directory, output_filename) # Open file and save classification metrics with open(output_filepath, "w") as output_file: output_file.write( f"Output for {self.nn_trained}:\n\nClassification Metrics:\n{self.nn_metrics}" ) def predict_unseen(self, unseen_image): """ Predicting the label of an unseen image and printing it to command line Input: - unseen image: Complete path to the unseen image, should be light number on dark background """ # Reading unseen image image = cv2.imread(unseen_image) # Preprocessing it to be on gray scale gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Resize image to be same size as MNIST data resized_image = cv2.resize(gray_image, (28, 28), interpolation=cv2.INTER_AREA) # Scale image as done to the MNIST data scaled_image = (resized_image - resized_image.min()) / ( resized_image.max() - resized_image.min()) # Flatten image to be in input format for neural network flattened_image = scaled_image.flatten() # Predicting label probabilities = self.nn_trained.predict(flattened_image) # Getting label index with the max probability prediction = probabilities.argmax(axis=1) # Printing prediction print( f"[OUTPUT] The image {unseen_image} is most likely a {prediction}." )
def train_class(self): ''' Creating training data and training the classification model ''' # Create training and test data X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, random_state=9, train_size=self.train_size) # argparse - make this the default # Min-Max scaling X_train_scaled = (X_train - X_train.min())/(X_train.max() - X_train.min()) X_test_scaled = (X_test - X_test.min())/(X_test.max() - X_test.min()) # convert labels from integers to vectors y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) # train network using the NeuralNetwork class from utils print("\n[INFO] training network...") # if only one hidden layer is defined if self.hidden_layer1 > 0 and self.hidden_layer2 == 0 and self.hidden_layer3 == 0 and self.hidden_layer4 == 0: print(f"\nYour model is using one hidden layer with the size [{self.hidden_layer1}]") # train the model with one hidden layer nn_model = NeuralNetwork([X_train_scaled.shape[1], self.hidden_layer1, 10]) # else if two hidden layers are defined elif self.hidden_layer1 > 0 and self.hidden_layer2 > 0 and self.hidden_layer3 == 0 and self.hidden_layer4 == 0: print(f"\nYour model is using two hidden layers with the size [{self.hidden_layer1}, {self.hidden_layer2}]") # train the model with two hidden layers nn_model = NeuralNetwork([X_train_scaled.shape[1], self.hidden_layer1, self.hidden_layer2, 10]) # else if three hidden layers are defined elif self.hidden_layer1 > 0 and self.hidden_layer2 > 0 and self.hidden_layer3 > 0 and self.hidden_layer4 == 0: print(f"\nYour model is using three hidden layers with the size [{self.hidden_layer1}, {self.hidden_layer2}, {self.hidden_layer3}]") # train the model with three hidden layers nn_model = NeuralNetwork([X_train_scaled.shape[1], self.hidden_layer1, self.hidden_layer2, self.hidden_layer3, 10]) # else if four hidden layers are defined elif self.hidden_layer1 > 0 and self.hidden_layer2 > 0 and self.hidden_layer3 > 0 and self.hidden_layer4 > 0: print(f"\nYour model is using four hidden layers with the size [{self.hidden_layer1}, {self.hidden_layer2}, {self.hidden_layer3}, {self.hidden_layer4}]") # train the model with four hidden layers nn_model = NeuralNetwork([X_train_scaled.shape[1], self.hidden_layer1, self.hidden_layer2, self.hidden_layer3, self.hidden_layer4, 10]) # printing progress print("\n[INFO] {}".format(nn_model)) nn_model.fit(X_train_scaled, y_train, epochs=self.epochs) return nn_model, X_test_scaled, y_test
def main(): """ ---------- Parameters ---------- """ # Create an argument parser from argparse ap = argparse.ArgumentParser() # add argument about size of training data with 80% as default ap.add_argument("-trs", "--train_size", required=False, default = 0.8, type = float, help="The size of the train data as percent, the default is 0.8") # add argument about size of test data with 20 % as default ap.add_argument("-tes", "--test_size", required=False, default = 0.2, type = float, help="The size of the test data as percent, the default is 0.2") # add argument about number of epochs with 20 epochs as default ap.add_argument("-epo", "--epochs_number", required=False, default = 20, type = int, help="The number of epochs, the default is 20") args = vars(ap.parse_args()) trs_size = args["train_size"] tes_size = args["test_size"] epochs_number = args["epochs_number"] """ ---------- Neural network model ---------- """ print("[nfo] Neural network model...") # Fetch data. When fetching the data like this, the X and y is already defined as the data and the labels. X, y = fetch_openml('mnist_784', version=1, return_X_y=True) # Convert to numpy arrays X = np.array(X) y = np.array(y) # MinMax regularization X = ( X - X.min())/(X.max() - X.min()) ("[nfo] Splitting into train and test...") # Split data. X contains the data and will be split into training and test data. y contains the labels and will split into train and test as well. X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = trs_size, test_size=tes_size) # Convert labels from integers to vectors y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) # Train the network print("[INFO] training network...") # The layers are 32 and 16 and the output is 10 nn = NeuralNetwork([X_train.shape[1], 32, 16, 10]) print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs=epochs_number) # Evaluate network print(["[INFO] Evaluating network..."]) predictions = nn.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))
def main(): """ ---------- Parameters ---------- """ #Create an argument parser from argparse ap = argparse.ArgumentParser(description = "[INFO] Classify MNIST data and print out performance report", formatter_class = RawTextHelpFormatter) #size of test data in percentage ap.add_argument("-tes", "--test_size", required = False, default = 0.2, type = float, help = "[INFO] The size of the test data as a float percentage. \n" "[INFO] Training size will be adjusted automatically. \n" "[TYPE] float \n" "[DEFAULT] 0.2 \n" "[EXAMPLE] --test_size 0.1") #number of epochs ap.add_argument("-ep", "--epochs", required = False, default = 500, type = int, help = "[INFO] The number of epochs that should run \n" "[TYPE] int \n" "[DEFAULT] 500 \n" "[EXAMPLE] --epochs 250") #Hidden layers ap.add_argument("-l", "--hidden_layers", required = False, default = [32, 16], nargs = "*", type = int, help = "[INFO] Hidden layers as a list of ints \n" "[INFO] There can be between 1 and 3 layers \n" "[TYPE] int \n" "[DEFAULT] 32 16 \n" "[EXAMPLE] --layers 8 16 8") #Create an argument parser from argparse args = vars(ap.parse_args()) #Save in variables for readability epoch_n = args["epochs"] layers = args["hidden_layers"] tes = args["test_size"] #test size """ ---------- Get and transform data ---------- """ print("Fetching MNIST data ...") #Fetch data X, y = fetch_openml("mnist_784", version=1, return_X_y=True) #Convert to numpy arrays X = np.array(X) #data y = np.array(y) #labels print("Preprocessing data ...") #Rescale from between 0-255 to between 0-1 X = (X - X.min())/(X.max() - X.min()) #Create training data and test dataset X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = tes) # convert labels from integers to vectors (binary) y_train = LabelBinarizer().fit_transform(y_train) y_test = LabelBinarizer().fit_transform(y_test) """ ----------- Train network ----------- """ layers_length = len(layers) #If there are three layers if (layers_length == 3): #Train network print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], int(layers[0]), int(layers[1]), int(layers[2]), 10]) print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs= epoch_n) #If there are two layers elif (layers_length == 2): #Train network print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], int(layers[0]), int(layers[1]), 10]) print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs= epoch_n) #If there is one layer elif (layers_length == 1): #Train network print("[INFO] training network...") nn = NeuralNetwork([X_train.shape[1], int(layers[0]), 10]) print("[INFO] {}".format(nn)) nn.fit(X_train, y_train, epochs= epoch_n) """ ------------ Evaluate network------------ """ # Evaluate network print(["[INFO] evaluating network..."]) predictions = nn.predict(X_test) # We take the model and predict the test class predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))