def kNN_Validate(dataName, grpName, folds, k=3, d=2, trans=None):
    """
		params: dataName := file with the data set
			grpName := file with the different groupings
			folds := number of folds
			k := number of neigbors to base the classification off of
							where the default is 3
			d := the minkowski distance to use, default is 2
			trans := transformation function to be applied on data set
		objective: performs cross validation using kNN as classifier
				eturns: a list of tuples organized as (test_predicted, test_groundTruth)

	"""
    valid = vd.Validate(grpName, folds)
    data, labels = bd(dataName)
    results = []  #stores tuples: (list_predicted, list_groundTruth)
    for i in range(valid.getFoldCount()):
        print("kNN iteration %d" % i)
        #get the train and test indices of the data set
        testIndex, trainIndex = valid.getTest(i), valid.getTrain(i)
        #build the test set and test labels
        testSet, testLabels = data[testIndex, :], labels[testIndex]
        #build the train set and training labels
        trainSet, trainLabels = data[trainIndex, :], labels[trainIndex]
        #if the data is to be transformed
        if trans is not None:
            if trans is fld:
                tmp = trans(trainSet, trainLabels)
                trainSet = np.matmul(trainSet, tmp)
                trainSet = trainSet.reshape(-1, 1).astype(np.float64)
                testSet = np.matmul(testSet, tmp)
                testSet = testSet.reshape(-1, 1).astype(np.float64)
            else:
                tmp = trans(trainSet).transpose()
                trainSet = np.matmul(trainSet, tmp)
                testSet = np.matmul(testSet, tmp)
        #standardize the training and test set
        trainSet, testSet = standard(trainSet, testSet)
        #classify test set and add it to the results list
        results.append((knn.kNN(trainSet, testSet, trainLabels, k,
                                d), testLabels))
    results = ev.buildConfusionMatrices(results)
    results = ev.normalizeConfMat(results)
    results = ev.getAvgProbMatrix(results)
    print("knn results", results)
    results = ev.rocData(results)
    print("%d-NN Accuracy: %f" % (k, results["Acc"]))
    return results
Exemple #2
0
def bpnn_mpp_fusion(dataName, grpName, folds, trans=None):
    """ 
		params: 
			dataName := file with the data set
			grpName := file with the different groupings
			folds := number of folds
			trans := transformation function to be applied on the data set
		objective: performs cross validation using neural net as classifier
		returns: a list of tuples organized as (test_predicted, test_groundTruth)
	"""
    valid = vd.Validate(grpName, folds)
    data, labels = bd(dataName)
    results = []  #stores tuples: (list_predicted, list_groundTruth)
    for i in range(valid.getFoldCount()):
        #get the train and test indices of the data set
        testIndex, trainIndex = valid.getTest(i), valid.getTrain(i)
        #build the test set and test labels
        testSet, testLabels = data[testIndex, :], labels[testIndex]
        #build the train set and training labels
        trainSet, trainLabels = data[trainIndex, :], labels[trainIndex]
        #if the data is to be transformed
        if trans is not None:
            if trans is fld:
                tmp = trans(trainSet, trainLabels)
                trainSet = np.matmul(trainSet, tmp)
                trainSet = trainSet.reshape(-1, 1).astype(np.float64)
                testSet = np.matmul(testSet, tmp)
                testSet = testSet.reshape(-1, 1).astype(np.float64)
            else:
                tmp = trans(trainSet).transpose()
                trainSet = np.matmul(trainSet, tmp)
                testSet = np.matmul(testSet, tmp)
        #standardize the training and test set
        trainSet, testSet = standard(trainSet, testSet)
        #classify test set and add it to the results list

        pred0 = bpnn.nn(trainSet, testSet, trainLabels)
        pred1 = mpp(trainSet, testSet, trainLabels, 2)
        pred = bind(pred0, pred1, testLabels)
        results.append((np.array(pred).astype(np.int), testLabels))
    results = ev.buildConfusionMatrices(results)
    results = ev.normalizeConfMat(results)
    results = ev.getAvgProbMatrix(results)
    results = ev.rocData(results)
    print("bpnn_mpp2_fusion Accuracy: %f" % (results["Acc"]))
    return results
Exemple #3
0
def MPP_Validate(dataName, grpName, folds, case=3, priors=None, trans=None):
    """
		params: dataName := file with the data set
				grpName  := file with the different groupings
				folds		 := number of folds
				trans := transformation function to do dimensionality reduction
				case		 := case of the discriminant function to use
							defaulted to case 3
				priors := the prior probabilities for the two classes. Defaulted to None
		objective: performs cross validation using mpp as classifier
				with the discriminant function cases
		returns: a dictionary with performance evaluation data 
	"""
    valid = vd.Validate(grpName, folds)
    data, labels = bd(dataName)
    results = []  #stores tuples: (list_predicted, list_groundTruth)
    for i in range(valid.getFoldCount()):
        print("Iteration %d" % i)
        #get the train and test indices of the data set
        testIndex, trainIndex = valid.getTest(i), valid.getTrain(i)
        #build the test set and test labels
        testSet, testLabels = data[testIndex, :], labels[testIndex]
        #build the train set and training labels
        trainSet, trainLabels = data[trainIndex, :], labels[trainIndex]
        #if the data is to be transformed
        if trans is not None:
            if trans is fld:
                tmp = trans(trainSet, trainLabels)
                trainSet = np.matmul(trainSet,
                                     tmp).reshape(-1, 1).astype(np.float64)
Exemple #4
0
#!/usr/local/bin/python3

from buildData import buildData as bd
import numpy as np


def pca(tr, tol=.1):
    '''
				returns the number of eigenvectors to drop, given tolerance
		'''
    c = np.cov(tr, rowvar=False)
    #presumably, the eigs are sorted in descending order
    eigs, vec = np.linalg.eig(c)
    divisor = np.sum(eigs)
    vec = vec[:3, :]
    return vec


data, labels = bd("../data/EEG_dropcat.csv")
mat = np.matmul(data, pca(data).transpose())

for r, c in zip(mat, labels):
    for v in r:
        print(str(v), end=",")
    print(c)