def main(): """ Driver.py """ upperCaseLettersInfoFile = 'info_1.csv' greekLettersInfoFile = 'info_2.csv' trainingData1File = 'train_1.csv' trainingData2File = 'train_2.csv' val1File = 'val_1.csv' val2File = 'val_2.csv' testWithLabel1File = 'test_with_label_1.csv' testWithLabel2File = 'test_with_label_2.csv' baseDTFile1 = 'Base-DT-DS1.csv' baseDTFile2 = 'Base-DT-DS2.csv' bestDTFile1 = 'Best-DT-DS1.csv' bestDTFile2 = 'Best-DT-DS2.csv' upperCaseLettersDict = utils.getInfo(upperCaseLettersInfoFile) greekLettersDict = utils.getInfo(greekLettersInfoFile) #Get data uses Pandas library. Returns 2d array with column headers in first row trainingData1 = utils.getData(trainingData1File) trainingData2 = utils.getData(trainingData2File) #Generating our ML Models baseDTUpperCase = decisionTree.generateBaseDT(trainingData1) baseDTGreek = decisionTree.generateBaseDT(trainingData2) bestDTUpperCase = decisionTree.generateBestDT(trainingData1) bestDTGreek = decisionTree.generateBestDT(trainingData2) val1Data = utils.getData(val1File) val2Data = utils.getData(val2File) testWithLabel1 = utils.getData(testWithLabel1File) testWithLabel2 = utils.getData(testWithLabel2File) utils.plotInstances(trainingData1, upperCaseLettersDict, 'Uppercase Letters', 'Training', 'trainingUppercase') utils.plotInstances(trainingData2, greekLettersDict, 'Greek Letters', 'Training', 'trainingGreek') utils.plotInstances(val1Data, upperCaseLettersDict, 'Uppercase Letters', 'Validation', 'validationUppercase') utils.plotInstances(val2Data, greekLettersDict, 'Greek Letters', 'Validation', 'validationGreek') utils.plotInstances(testWithLabel1, upperCaseLettersDict, 'Uppercase Letters', 'Test', 'testUppercase') utils.plotInstances(testWithLabel2, greekLettersDict, 'Greek Letters', 'Test', 'testGreek') print('Running Validation for Base DT - Upper Case Letters...') utils.testModel(baseDTUpperCase, val1Data) print('Running Validation for Best DT - Upper Case Letters...') utils.testModel(bestDTUpperCase, val1Data) print('\nRunning Tests for Base DT - Upper Case Letters...') baseDTRes1 = utils.testModel(baseDTUpperCase, testWithLabel1) utils.writeMLResults(baseDTRes1, baseDTFile1) print('Running Tests for Best DT - Upper Case Letters...') bestDTRes1 = utils.testModel(bestDTUpperCase, testWithLabel1) utils.writeMLResults(bestDTRes1, bestDTFile1) print('\nRunning Validation for Base DT - Greek Letters...') utils.testModel(baseDTGreek, val2Data) print('Running Validation for Best DT - Greek Letters...') utils.testModel(bestDTGreek, val2Data) print('\nRunning Tests for Base DT - Greek Letters...') baseDTRes2 = utils.testModel(baseDTGreek, testWithLabel2) utils.writeMLResults(baseDTRes2, baseDTFile2) print('Running Tests for Best DT - Greek Letters...') bestDTRes2 = utils.testModel(bestDTGreek, testWithLabel2) utils.writeMLResults(bestDTRes2, bestDTFile2) utils.plotConfusionMatrix(baseDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Base-DT-DS1-CM') utils.plotConfusionMatrix(baseDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Base-DT-DS2-CM') utils.plotConfusionMatrix(bestDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Best DT Uppercase Letters' ,'Best-DT-DS1-CM') utils.plotConfusionMatrix(bestDTGreek, testWithLabel2, greekLettersDict, 'Best DT Greek Letters' ,'Best-DT-DS2-CM') utils.getClassificationReport(baseDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Base-DT-DS1-Report') utils.getClassificationReport(baseDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Base-DT-DS2-Report') utils.getClassificationReport(bestDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Best-DT-DS1-Report') utils.getClassificationReport(bestDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Best-DT-DS2-Report')
"score_test": high_score_test, "accuracy_even": acc_even, "accuracy_disc": acc_disc, "accuracy_logr": acc_even }) dffilename = os.path.join(result_path, "svm_joint.csv") if not os.path.exists(os.path.dirname(dffilename)): os.makedirs(os.path.dirname(dffilename)) df.to_csv(dffilename) # Plot confusion matrix target_names = ["intent"]*9 for i in range(9): target_names[i] += str(i+1) cnf_matrix = confusion_matrix(test_labels, high_prediction) utils.plotConfusionMatrix(cnf_matrix, target_names) # Plot trates accuracies utils.plotAccBar(high_score_train, high_score_test, num_frames) # Save predictions to files # Save even weighted predictions predevenfilename = os.path.join(result_path, "svm_joint_even.txt") if not os.path.exists(os.path.dirname(predevenfilename)): os.makedirs(os.path.dirname(predevenfilename)) np.savetxt(predevenfilename, pred_even, fmt="%d") # Save discont weighted predictions preddiscfilename = os.path.join(result_path, "svm_joint_disc.txt") if not os.path.exists(os.path.dirname(preddiscfilename)): os.makedirs(os.path.dirname(preddiscfilename)) np.savetxt(preddiscfilename, pred_disc, fmt="%d")
from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from utils import plotConfusionMatrix import numpy as np import keras np.random.seed(1588390) # MARK: - load data (xTrain, yTrain), (xTest, yTest) = keras.datasets.mnist.load_data() xTrain = np.reshape(xTrain, (-1, 28*28)) / 255. xTest = np.reshape(xTest, (-1, 28*28)) / 255. # MARK: - logistic regression model = LogisticRegression(solver='saga', multi_class='multinomial', max_iter=100, verbose=2) model.fit(xTrain, yTrain) yPred = model.predict(xTest) confusionMatrix = confusion_matrix(yTest, yPred) plotConfusionMatrix(confusionMatrix, [i for i in range(10)], 'problem1.png')
### model.evaluate(x_test, y_test) val_loss = (history1.history['val_loss']) train_loss = (history1.history['loss']) plotLoss(val_loss, train_loss) val_accuracy = history1.history['val_accuracy'] train_accuracy = history1.history['accuracy'] plotAccuracy(val_accuracy, train_accuracy) y_pred = model.predict(x_test) y_test_label = [class_labels[i] for i in np.argmax(y_test, axis=1)] y_pred_label = [class_labels[i] for i in np.argmax(y_pred, axis=1)] plotConfusionMatrix(class_labels, y_test_label, y_pred_label, title='Confusion matrix - vgg16') #--- ### train model with data augmentation opt2 = keras.optimizers.Adam(learning_rate=1e-5) model_augmented = VggModel() model_augmented.compile(optimizer=opt2, loss='categorical_crossentropy', metrics=['accuracy']) model_checkpoint2 = ModelCheckpoint('vgg_model_augmented.h5', 'val_accuracy', save_best_only=True) my_callbacks2 = [model_checkpoint2, early_stop, reduce_lr] history2 = model_augmented.fit(train_batches, steps_per_epoch=len(x_train) // batch_size,
zScoreNormalization(trainInputs, testInputs) # print(trainInputs[:5]) # print(testInputs[:5]) classifier = MLPClassifier(hidden_layer_sizes=( 5, 5, ), activation='relu', max_iter=1000, solver='sgd', verbose=10, random_state=10, learning_rate_init=.1) classifier.fit(trainInputs, trainOutputs) computedOutputs = classifier.predict(testInputs) print("Computed: " + str(list(computedOutputs))) print("Real: " + str(testOutputs)) accuracy, precision, recall, confusionMatrix = evalMultiClass( testOutputs, list(computedOutputs), outputLabels) print('accuracy: ', accuracy) print('precision: ', precision) print('recall: ', recall) plotConfusionMatrix(confusionMatrix, outputLabels, "Sepia vs Non-Sepia classification")
def main(): actionDatabaseDir = config.actionDatabaseDir categories = config.actionCateogory ## Step.1 Data loading and features extraction # Get features from training data over all categories print "Data loading and feature extraction ..." allFeatures = np.array([]) trainActionSequence = [] testActionSequence = [] if os.path.exists('allFeatures.npy') \ and os.path.exists('trainActionSequence.npy') \ and os.path.exists('testActionSequence.npy'): allFeatures = np.load('allFeatures.npy') trainActionSequence = np.load('trainActionSequence.npy') testActionSequence = np.load('testActionSequence.npy') else: for category in categories: categoryPath = os.path.join(actionDatabaseDir, category) allData = os.listdir(categoryPath) # Train data and test data loading for data in allData: filePath = os.path.join(categoryPath, data) actionSequence = ActionSequence(filePath) actionSequence.extractStip() # extract STIP subject = actionSequence.subject if subject in config.trainDataSubjects: trainActionSequence.append(actionSequence) if allFeatures.size == 0: allFeatures = actionSequence.stipFeatures else: allFeatures = np.vstack((allFeatures, actionSequence.stipFeatures)) elif subject in config.testDataSubjects: testActionSequence.append(actionSequence) np.save('allFeatures', allFeatures) np.save('trainActionSequence', trainActionSequence) np.save('testActionSequence', testActionSequence) ## Step.2 Codebook generation print "Codebook generation ..." bovw = BagOfWords(featureEncodingMethod = 'sparse-coding', poolingMethod = 'max-pooling', normalizationMethod = 'L2-norm') # bovw = BagOfWords(featureEncodingMethod = 'vector-quantization', # poolingMethod = 'sum-pooling', # normalizationMethod = 'L1-norm') if os.path.exists('codebook.npy'): codebook = np.load('codebook.npy') bovw.codebook = codebook else: bovw.generateCodebook(allFeatures) np.save('codebook', bovw.codebook) ## Step.3 Feature encoding for train data train_y = [] train_X = [] for actionSequence in trainActionSequence: # Feature encoding, pooling, normalization actionSequence.generateFinalFeatures(bovw) # Format train data train_y.append(actionSequence.categoryId) train_X.append(actionSequence.finalFeatures) # Cross validation if config.is_cv: # cross validation crossValidate(train_y, train_X) ## Step.4 Classification # Learning using SVM svmTool = SvmTool() print "Model learning ..." svmTool.learnModel(train_y, train_X) # Feature encoding for test data and classify data using learned model print "Predicating ..." numCorrect = 0 trueLabels = [] testPredLabels = [] removeall(config.predctDir) for actionSequence in testActionSequence: # Feature encoding, pooling, normalization actionSequence.generateFinalFeatures(bovw) # Format train data test_y = [actionSequence.categoryId] test_X = [actionSequence.finalFeatures] p_label, _ = svmTool.doPredication(test_y, test_X) predCagtegoryId = int(p_label[0]) predCategoryName = categories[predCagtegoryId] # Write predicated action to predicated category predCategoryPath = os.path.join(config.predctDir, predCategoryName) if not os.path.exists(predCategoryPath): os.makedirs(predCategoryPath) predFilePath = os.path.join(predCategoryPath, actionSequence.filename) f = open(predFilePath, 'w') f.close() if predCagtegoryId == actionSequence.categoryId: numCorrect += 1 trueLabels.append(actionSequence.categoryName) testPredLabels.append(predCategoryName) # Calculate results accuracy = numCorrect / float(len(testActionSequence)) print "accuracy: ", accuracy # Plot confusion matrix saveFilename = 'confusion_matrix.png' plotConfusionMatrix(trueLabels, testPredLabels, saveFilename, normalization = False) saveFilename = 'confusion_matrix_norm.png' plotConfusionMatrix(trueLabels, testPredLabels, saveFilename, normalization = True)
history1 = pickle.load(open('base_history.pkl', 'rb')) model.evaluate(x_test, y_test) val_loss = (history1['val_loss']) train_loss = (history1['loss']) plotLoss(val_loss, train_loss) val_accuracy = history1['val_accuracy'] train_accuracy = history1['accuracy'] plotAccuracy(val_accuracy, train_accuracy) y_pred = model.predict(x_test) y_test_label = [class_labels[i] for i in np.argmax(y_test, axis=1)] y_pred_label = [class_labels[i] for i in np.argmax(y_pred, axis=1)] plotConfusionMatrix(class_labels, y_test_label, y_pred_label) ### model_augmented = keras.models.load_model('base_model_augmented.h5') history2 = pickle.load(open('base_history_augmented.pkl', 'rb')) model_augmented.evaluate(x_test, y_test) val_loss = (history2['val_loss']) train_loss = (history2['loss']) plotLoss(val_loss, train_loss) val_accuracy = history2['val_accuracy'] train_accuracy = history2['accuracy'] plotAccuracy(val_accuracy, train_accuracy)