def unpack_data(): source = ['mercedes', '5577', '2020', 'Aug'] training_data = import_all_training(source,shuffle=True) print("Quantity of training data:", len(training_data)) classifier_outputs = len(numpy.unique(numpy.array(training_data, dtype=object)[:,0])) X = [] y = [] for sample in training_data: # For each sample in the training dataset the inputs become a normalised array of the original data inputs = sample[1] inputs = (inputs/ 255 * 0.99) + 0.01 inputs = inputs.flatten() #inputs = (numpy.asfarray(sample[1:])/ 255 * 0.99) + 0.01 X.append(inputs) # An array of target values is set up based on the neuron sample type specified at the start of each line targets = numpy.zeros(classifier_outputs) + 0.01 targets[int(sample[0])] = 0.99 y.append(int(sample[0])) pass X = numpy.array(X) y = numpy.array(y) return X,y
def run_test_harness(): # load dataset source = ["mcdonald", "5577", "2018", "Jul"] train_test_split = 1000 img_sz = 128 trainingdata = import_all_training(source, shuffle=True, img_size=img_sz) if train_test_split > len(trainingdata): train_test_split = len(trainingdata) - 2 classifier_outputs = len( numpy.unique(numpy.array(trainingdata, dtype=object)[:, 0])) trainX, trainY, testX, testY = load_evaluation_data( trainingdata, train_test_split, classifier_outputs) #### Test Model scores, histories, times = kfold_evaluate_model(trainX, trainY, img_sz, classifier_outputs, n_folds=5) #scores, histories = evaluate_model(trainX, trainY, testX, testY, img_sz, num_outputs) # learning curves summarize_accuracy(histories) # summarize estimated performance #summarize_performance(scores) with open("CNN_time_log.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerow(times) with open("CNN_acc_log.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerow(scores) return
def run_prediction_model(source): # Import the training data for the CNN to learn from num_outputs = 7 train_test_split = 500 img_sz = 32 trainingdata = import_all_training(source, shuffle=True, img_size=img_sz) if train_test_split > len(trainingdata): train_test_split = len(trainingdata) - 2 trainX, trainY, testX, testY = load_evaluation_data( trainingdata, train_test_split, num_outputs) # ~~~ Make Classification Predictions ~~~ unlabelled_data = import_prediction(source) predictX = load_unlabelled_data(unlabelled_data) predictions = prediction_model(trainX, trainY, predictX, img_sz, num_outputs) labels = simplify_predictions(predictions, unlabelled_data) organise_data(labels, source) return
def run_hierarch(num_clusters, samples, testmode): if testmode == True: data_source = ["mcdonald", "5577", "2018", "Jul"] data = import_all_training(data_source) else: data = import_all(samples) time_start = time.time() hierarach_obj = Hierarchical(num_clusters) hierarach_obj.load_evaluation_data(data) labels = hierarach_obj.cluster() timetest = time.time() - time_start if testmode == True: with open("hierarchical_labels.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(labels) else: #hierarach_obj.move_to_new_dir(labels) pass return timetest
def run_kmeans(outputs, samples, testmode): if testmode == True: data_source = ["mcdonald", "5577", "2018", "Jul"] data = import_all_training(data_source) else: data = import_all(samples) time_start = time.time() kmeans_ob = Kmeans(outputs) kmeans_ob.load_evaluation_data(data) labels = kmeans_ob.cluster() timetest = time.time() - time_start #kmeans_ob.elbow_method(10) if testmode == True: with open("k-means_labels.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(labels) else: kmeans_ob.move_to_new_dir(labels) return timetest
def run_CNN(num_of_outputs, train_test_split, dropout_rate=0.5, num_of_filters=[64, 128], num_of_dense_units=128, act_func='relu'): DR = dropout_rate NF = num_of_filters ND = num_of_dense_units AF = act_func NO = num_of_outputs img_sze = 64 # Import all training data and complete a sanity check on the split value. training_dataset = import_all_training("True", img_size=img_sze) if train_test_split > len(training_dataset): raise Exception("Split must be smaller than dataset size!") newCNN = CNN(DR, NF, ND, AF, NO) newCNN.load_evaluation_data(training_dataset, train_test_split) score = newCNN.CNN_2D_evaluate(img_sze) accuracy = score[1] * 100.0 test_loss = score[0] return accuracy, test_loss
hierarach_obj = Hierarchical(num_clusters) hierarach_obj.load_evaluation_data(data) labels = hierarach_obj.cluster() timetest = time.time() - time_start if testmode == True: with open("hierarchical_labels.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(labels) else: #hierarach_obj.move_to_new_dir(labels) pass return timetest data_source = ["mcdonald", "5577", "2018", "Jul"] training_data = import_all_training(data_source, shuffle=True) print("Quantity of training data:", len(training_data)) train_test_split = 350 classifier_outputs = len( numpy.unique(numpy.array(training_data, dtype=object)[:, 0])) # -------------- MLP TEST AREA --------------------------- # mlp_outputs = classifier_outputs # hidden_nodes = [20,40,60,80,100,120,140] # acc_list = [] # for i in range(3*len(hidden_nodes)-1): # hid_node = hidden_nodes[round(i/3)] # f1score = run_MLP(training_data, mlp_outputs, hidden_nodes=hid_node)
return def _PCA_3D(x_data, y_data): time_start = time.time() pca = PCA(n_components=3) pca_result = pca.fit_transform(x_subset) print('PCA done! Time elapsed: {} seconds'.format(time.time() - time_start)) pca_df = pd.DataFrame(columns=['pca1', 'pca2', 'pca3']) pca_df['pca1'] = pca_result[:, 0] pca_df['pca2'] = pca_result[:, 1] pca_df['pca3'] = pca_result[:, 2] top_three_comp = pca_df[['pca1', 'pca2', 'pca3']] scatter_3D(top_three_comp.values, y_data) # Visualizing the PCA output return data_source = ["mcdonald", "5577", "2018", "Jul"] training_dataset = import_all_training(data_source) x_subset, y_subset = convert_data_format(training_dataset) #_tSNE(x_subset, y_subset) _PCA(x_subset, y_subset) #_PCA_3D(x_subset, y_subset)