Example #1
0
def main():
    """ Driver.py """
    upperCaseLettersInfoFile = 'info_1.csv'
    greekLettersInfoFile = 'info_2.csv'

    trainingData1File = 'train_1.csv'
    trainingData2File = 'train_2.csv'

    val1File = 'val_1.csv'
    val2File = 'val_2.csv'

    testWithLabel1File = 'test_with_label_1.csv'
    testWithLabel2File = 'test_with_label_2.csv'

    baseDTFile1 = 'Base-DT-DS1.csv'
    baseDTFile2 = 'Base-DT-DS2.csv'

    bestDTFile1 = 'Best-DT-DS1.csv'
    bestDTFile2 = 'Best-DT-DS2.csv'


    upperCaseLettersDict = utils.getInfo(upperCaseLettersInfoFile)
    greekLettersDict = utils.getInfo(greekLettersInfoFile)

    #Get data uses Pandas library. Returns 2d array with column headers in first row
    trainingData1 = utils.getData(trainingData1File)
    trainingData2 = utils.getData(trainingData2File)
    #Generating our ML Models
    baseDTUpperCase = decisionTree.generateBaseDT(trainingData1)
    baseDTGreek = decisionTree.generateBaseDT(trainingData2)
    bestDTUpperCase = decisionTree.generateBestDT(trainingData1)
    bestDTGreek = decisionTree.generateBestDT(trainingData2)
    
    val1Data = utils.getData(val1File)
    val2Data = utils.getData(val2File)
    testWithLabel1 = utils.getData(testWithLabel1File)
    testWithLabel2 = utils.getData(testWithLabel2File)

    utils.plotInstances(trainingData1, upperCaseLettersDict, 'Uppercase Letters', 'Training', 'trainingUppercase')
    utils.plotInstances(trainingData2, greekLettersDict, 'Greek Letters', 'Training', 'trainingGreek')
    utils.plotInstances(val1Data, upperCaseLettersDict, 'Uppercase Letters', 'Validation', 'validationUppercase')
    utils.plotInstances(val2Data, greekLettersDict, 'Greek Letters', 'Validation', 'validationGreek')
    utils.plotInstances(testWithLabel1, upperCaseLettersDict, 'Uppercase Letters', 'Test', 'testUppercase')
    utils.plotInstances(testWithLabel2, greekLettersDict, 'Greek Letters', 'Test', 'testGreek')

    print('Running Validation for Base DT - Upper Case Letters...')
    utils.testModel(baseDTUpperCase, val1Data)
    print('Running Validation for Best DT - Upper Case Letters...')
    utils.testModel(bestDTUpperCase, val1Data)

    print('\nRunning Tests for Base DT - Upper Case Letters...')
    baseDTRes1 = utils.testModel(baseDTUpperCase, testWithLabel1)
    utils.writeMLResults(baseDTRes1, baseDTFile1)
    print('Running Tests for Best DT - Upper Case Letters...')
    bestDTRes1 = utils.testModel(bestDTUpperCase, testWithLabel1)
    utils.writeMLResults(bestDTRes1, bestDTFile1)
    
    print('\nRunning Validation for Base DT - Greek Letters...')
    utils.testModel(baseDTGreek, val2Data)
    print('Running Validation for Best DT - Greek Letters...')
    utils.testModel(bestDTGreek, val2Data)
    
    print('\nRunning Tests for Base DT - Greek Letters...')
    baseDTRes2 = utils.testModel(baseDTGreek, testWithLabel2)
    utils.writeMLResults(baseDTRes2, baseDTFile2)
    print('Running Tests for Best DT - Greek Letters...')
    bestDTRes2 = utils.testModel(bestDTGreek, testWithLabel2)
    utils.writeMLResults(bestDTRes2, bestDTFile2)

    utils.plotConfusionMatrix(baseDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Base-DT-DS1-CM')
    utils.plotConfusionMatrix(baseDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Base-DT-DS2-CM')
    utils.plotConfusionMatrix(bestDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Best DT Uppercase Letters' ,'Best-DT-DS1-CM')
    utils.plotConfusionMatrix(bestDTGreek, testWithLabel2, greekLettersDict, 'Best DT Greek Letters' ,'Best-DT-DS2-CM')

    utils.getClassificationReport(baseDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Base-DT-DS1-Report')
    utils.getClassificationReport(baseDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Base-DT-DS2-Report')
    utils.getClassificationReport(bestDTUpperCase, testWithLabel1, upperCaseLettersDict, 'Base DT Uppercase Letters' ,'Best-DT-DS1-Report')
    utils.getClassificationReport(bestDTGreek, testWithLabel2, greekLettersDict, 'Base DT Greek Letters' ,'Best-DT-DS2-Report')
Example #2
0
  "score_test": high_score_test,
  "accuracy_even": acc_even,
  "accuracy_disc": acc_disc,
  "accuracy_logr": acc_even
  })
dffilename = os.path.join(result_path, "svm_joint.csv")
if not os.path.exists(os.path.dirname(dffilename)):
  os.makedirs(os.path.dirname(dffilename))
df.to_csv(dffilename)
    
# Plot confusion matrix
target_names = ["intent"]*9
for i in range(9):
  target_names[i] += str(i+1)
cnf_matrix = confusion_matrix(test_labels, high_prediction)
utils.plotConfusionMatrix(cnf_matrix, target_names)

# Plot trates accuracies
utils.plotAccBar(high_score_train, high_score_test, num_frames)

# Save predictions to files
# Save even weighted predictions
predevenfilename = os.path.join(result_path, "svm_joint_even.txt")
if not os.path.exists(os.path.dirname(predevenfilename)):
  os.makedirs(os.path.dirname(predevenfilename))
np.savetxt(predevenfilename, pred_even, fmt="%d")
# Save discont weighted predictions
preddiscfilename = os.path.join(result_path, "svm_joint_disc.txt")
if not os.path.exists(os.path.dirname(preddiscfilename)):
  os.makedirs(os.path.dirname(preddiscfilename))
np.savetxt(preddiscfilename, pred_disc, fmt="%d")
Example #3
0
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from utils import plotConfusionMatrix
import numpy as np
import keras

np.random.seed(1588390)


# MARK: - load data
(xTrain, yTrain), (xTest, yTest) = keras.datasets.mnist.load_data()

xTrain = np.reshape(xTrain, (-1, 28*28)) / 255.
xTest = np.reshape(xTest, (-1, 28*28)) / 255.


# MARK: - logistic regression
model = LogisticRegression(solver='saga', multi_class='multinomial', max_iter=100, verbose=2)
model.fit(xTrain, yTrain)

yPred = model.predict(xTest)
confusionMatrix = confusion_matrix(yTest, yPred)
plotConfusionMatrix(confusionMatrix, [i for i in range(10)], 'problem1.png')
###
model.evaluate(x_test, y_test)

val_loss = (history1.history['val_loss'])
train_loss = (history1.history['loss'])
plotLoss(val_loss, train_loss)

val_accuracy = history1.history['val_accuracy']
train_accuracy = history1.history['accuracy']
plotAccuracy(val_accuracy, train_accuracy)

y_pred = model.predict(x_test)
y_test_label = [class_labels[i] for i in np.argmax(y_test, axis=1)]
y_pred_label = [class_labels[i] for i in np.argmax(y_pred, axis=1)]
plotConfusionMatrix(class_labels,
                    y_test_label,
                    y_pred_label,
                    title='Confusion matrix - vgg16')

#---
### train model with data augmentation
opt2 = keras.optimizers.Adam(learning_rate=1e-5)
model_augmented = VggModel()
model_augmented.compile(optimizer=opt2,
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
model_checkpoint2 = ModelCheckpoint('vgg_model_augmented.h5',
                                    'val_accuracy',
                                    save_best_only=True)
my_callbacks2 = [model_checkpoint2, early_stop, reduce_lr]
history2 = model_augmented.fit(train_batches,
                               steps_per_epoch=len(x_train) // batch_size,
Example #5
0
        zScoreNormalization(trainInputs, testInputs)
    # print(trainInputs[:5])
    # print(testInputs[:5])

    classifier = MLPClassifier(hidden_layer_sizes=(
        5,
        5,
    ),
                               activation='relu',
                               max_iter=1000,
                               solver='sgd',
                               verbose=10,
                               random_state=10,
                               learning_rate_init=.1)

    classifier.fit(trainInputs, trainOutputs)

    computedOutputs = classifier.predict(testInputs)

    print("Computed: " + str(list(computedOutputs)))
    print("Real:     " + str(testOutputs))

    accuracy, precision, recall, confusionMatrix = evalMultiClass(
        testOutputs, list(computedOutputs), outputLabels)
    print('accuracy: ', accuracy)
    print('precision: ', precision)
    print('recall: ', recall)

    plotConfusionMatrix(confusionMatrix, outputLabels,
                        "Sepia vs Non-Sepia classification")
Example #6
0
def main():
    actionDatabaseDir = config.actionDatabaseDir
    categories = config.actionCateogory
    
    ## Step.1 Data loading and features extraction 
    # Get features from training data over all categories
    print "Data loading and feature extraction ..."
    
    allFeatures = np.array([])
    trainActionSequence = []
    testActionSequence = []
    
    if os.path.exists('allFeatures.npy') \
        and os.path.exists('trainActionSequence.npy') \
        and os.path.exists('testActionSequence.npy'):
        
        allFeatures = np.load('allFeatures.npy')
        trainActionSequence = np.load('trainActionSequence.npy')
        testActionSequence = np.load('testActionSequence.npy')
    else:
        for category in categories:
            categoryPath = os.path.join(actionDatabaseDir, category)
            allData = os.listdir(categoryPath)
            
            
            # Train data and test data loading
            for data in allData:
                filePath = os.path.join(categoryPath, data)
                actionSequence = ActionSequence(filePath)
                actionSequence.extractStip()    # extract STIP
                
                subject = actionSequence.subject
                if subject in config.trainDataSubjects:
                    trainActionSequence.append(actionSequence)
                    
                    if allFeatures.size == 0:
                        allFeatures = actionSequence.stipFeatures
                    else:
                        allFeatures = np.vstack((allFeatures, 
                                                 actionSequence.stipFeatures))
                        
                elif subject in config.testDataSubjects:
                    testActionSequence.append(actionSequence)
            
        
        np.save('allFeatures', allFeatures)        
        np.save('trainActionSequence', trainActionSequence)
        np.save('testActionSequence', testActionSequence)
            
    ## Step.2 Codebook generation
    print "Codebook generation ..."
    bovw = BagOfWords(featureEncodingMethod = 'sparse-coding',
                      poolingMethod = 'max-pooling',
                      normalizationMethod = 'L2-norm')

#     bovw = BagOfWords(featureEncodingMethod = 'vector-quantization',
#                   poolingMethod = 'sum-pooling',
#                   normalizationMethod = 'L1-norm')
    
    if os.path.exists('codebook.npy'):
        codebook = np.load('codebook.npy')
        bovw.codebook = codebook
    else:
        bovw.generateCodebook(allFeatures)
        np.save('codebook', bovw.codebook)
        
    ## Step.3 Feature encoding for train data
    train_y = []
    train_X = []
    
    for actionSequence in trainActionSequence:
        # Feature encoding, pooling, normalization
        actionSequence.generateFinalFeatures(bovw)
        
        # Format train data
        train_y.append(actionSequence.categoryId)
        train_X.append(actionSequence.finalFeatures)
    
    # Cross validation    
    if config.is_cv:
        # cross validation
        crossValidate(train_y, train_X)
    
    ## Step.4 Classification
    # Learning using SVM
    svmTool = SvmTool()
    print "Model learning ..."
    svmTool.learnModel(train_y, train_X)
    
    # Feature encoding for test data and classify data using learned model
    print "Predicating ..."
    numCorrect = 0
    trueLabels = []
    testPredLabels = []
    removeall(config.predctDir)
    
    for actionSequence in testActionSequence:
        # Feature encoding, pooling, normalization
        actionSequence.generateFinalFeatures(bovw)
        
        # Format train data
        test_y = [actionSequence.categoryId]
        test_X = [actionSequence.finalFeatures]
        
        p_label, _ = svmTool.doPredication(test_y, test_X)
        predCagtegoryId = int(p_label[0])
        predCategoryName = categories[predCagtegoryId]
        
        # Write predicated action to predicated category
        predCategoryPath = os.path.join(config.predctDir, predCategoryName)
        if not os.path.exists(predCategoryPath):
            os.makedirs(predCategoryPath)
        
        predFilePath = os.path.join(predCategoryPath, actionSequence.filename)
        
        f = open(predFilePath, 'w')
        f.close()
        
        if predCagtegoryId == actionSequence.categoryId:
            numCorrect += 1 
            
        trueLabels.append(actionSequence.categoryName)
        testPredLabels.append(predCategoryName)
    
    # Calculate results
    accuracy = numCorrect / float(len(testActionSequence))
    print "accuracy: ", accuracy
    
    # Plot confusion matrix
    saveFilename = 'confusion_matrix.png'
    plotConfusionMatrix(trueLabels, testPredLabels, 
                        saveFilename, normalization = False)
    
    saveFilename = 'confusion_matrix_norm.png'
    plotConfusionMatrix(trueLabels, testPredLabels, 
                        saveFilename, normalization = True)
history1 = pickle.load(open('base_history.pkl', 'rb'))

model.evaluate(x_test, y_test)

val_loss = (history1['val_loss'])
train_loss = (history1['loss'])
plotLoss(val_loss, train_loss)

val_accuracy = history1['val_accuracy']
train_accuracy = history1['accuracy']
plotAccuracy(val_accuracy, train_accuracy)

y_pred = model.predict(x_test)
y_test_label = [class_labels[i] for i in np.argmax(y_test, axis=1)]
y_pred_label = [class_labels[i] for i in np.argmax(y_pred, axis=1)]
plotConfusionMatrix(class_labels, y_test_label, y_pred_label)

###
model_augmented = keras.models.load_model('base_model_augmented.h5')
history2 = pickle.load(open('base_history_augmented.pkl', 'rb'))

model_augmented.evaluate(x_test, y_test)

val_loss = (history2['val_loss'])
train_loss = (history2['loss'])
plotLoss(val_loss, train_loss)

val_accuracy = history2['val_accuracy']
train_accuracy = history2['accuracy']
plotAccuracy(val_accuracy, train_accuracy)