def calculateAndWriteExecutionTime(classifierstring = 'AllFeatures', shift = False, windowLength = 250, Sort = False): featuremask = features.readFeatureMask(classifierstring) nrOfFeatures = len(featuremask) dictionary = {} featureName = None InputData = [[]] executionTimeList = [] X1, y1 = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength) X1, y1 = dataset.sortDataset(X1, y1, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8 #Just append some movement into the input data from each channel for i in range(8): InputData.append(X1[i][0]) #Pop the first list because it is empty InputData.pop(0) #just iterate through all the features for i in range(len(featuremask)): wrapped = wrapper(calculateFeature, i, InputData) featureString = str(features.FUNC_MAP.get(i)) featureString = featureString.split(" ") featureName = featureString [1] + " " dictionary[featureName] = min(timeit.repeat(wrapped, repeat = 10000, number = 1)) print("finished with feature %d" %i) executionTimeList.append(dictionary[featureName]) writeToFile(executionTimeList, windowLength) #Should the list be sorted and printed? if Sort: sortedValues = sorted(list(dictionary.values())) sortedKeys = sorted(list(dictionary), key = dictionary.__getitem__) printSortedValues(sortedKeys, sortedValues)
def startLearning(): bestParams = [] accuracyScore = [] f1Score = [] precision = [] classificationReport = [] classifierstring = "learning260RBFsvm22Features" #Make sure that the result can be reproduced seed = 7 np.random.seed(seed) X, y = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=250) X, y = dataset.sortDataset(X, y, length=10000, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], merge=True) #numClasses = 6 channelIndex = 0 featuremask = features.readFeatureMask() #Use number of features as input layer #numFeatures = len(featuremask) XL = features.extractFeaturesWithMask(X, channelIndex, featuremask=featuremask, printTime=False) XLtrain, XLtest, yTrain, yTest, XL, scaler = classifier.scaleAndSplit( XL, y[0]) #One hot encoding of the classes #yTrain = np_utils.to_categorical(yTrain) #yTest = np_utils.to_categorical(yTest) #Define variable with number of classes clf = KerasClassifier(build_fn=createModel, epochs=10, batch_size=50, verbose=0) #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 200, verbose = 2) clf.fit(XLtrain, yTrain) #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 50) #scores = model.evaluate(Xtest, yTest, verbose = 0) #print('Baseline Error: %.2f%%' %(100 - scores[1]*100)) scores = cross_val_score(clf, XLtrain, yTrain, cv=50, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print() print("Scores") print(scores)
def createPredictor(name, windowLength, datasetnum=1, shift=None, bruteForcemask=None, zeroClassMultiplier=2, datasetLength=130, kernel='rbf'): ##### Parameters if shift == None: if windowLength < 250: shift = True print "Shift is true" else: shift = False print "Shift is false" else: print("Shift is: %r" % shift) ##### Save parameters parameters = { 'windowLength': windowLength, 'shift': shift, 'dataset': datasetnum, 'datasetLength': datasetLength, 'kernel': kernel } pickle.dump(parameters, open("Parameters" + slash + name + ".pkl", "wb")) ##### Declarations bestParams = [] accuracyScore = [] f1Score = [] precision = [] classificationReport = [] XL = [[], [], [], [], [], [], [], []] y = [[], [], [], [], [], [], [], []] XLlist = [] ylist = [] XLtrain = None XLtest = None yTrain = None yTest = None ##### Code if isinstance(datasetnum, int): var = datasetnum datasetnum = [] datasetnum.append(var) print(datasetnum) for i in datasetnum: print i dataset.setDatasetFolder(i) X, Y = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength) Xl, Y = dataset.sortDataset(X, Y, length=datasetLength, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], merge=True, zeroClassMultiplier=zeroClassMultiplier) XLlist.append(Xl) ylist.append(Y) XL, y = dataset.mergeDatasets(XL, Xl, y, Y) #XL, y = dataset.mergeDatasets(XLlist, XLtest, yTrain, yTest) if bruteForcemask == None: features.compareFeatures2(name, shift, windowLength, X=XL, y=y, plot=False) featuremask = features.readFeatureMask(name) else: featuremask = features.readFeatureMask(bruteForcemask) features.writeFeatureMask(featuremask, name) for i in range(len(XLlist)): XLlist[i] = features.extractFeaturesWithMask(XLlist[i], featuremask=featuremask, printTime=False) print("XL list featureextraction finished") XL = features.extractFeaturesWithMask(XL, featuremask=featuremask, printTime=False) print("XL featureextraction finished") scaler = classifier.makeScaler(XL) for i in range(len(XLlist)): XLtrain1, XLtest1, yTrain1, yTest1, XLlist[ i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler) if i == 0: XLtrain = XLtrain1 yTrain = yTrain1 XLtest = XLtest1 yTest = yTest1 else: XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain, yTrain1) XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest, yTest1) print("Split fininshed, starting training") if kernel == 'rbf': clf = svm.SVC(kernel='rbf', gamma=0.01, C=10, decision_function_shape='ovr') elif kernel == 'linear': clf = svm.SVC(kernel='linear', gamma=0.01, C=10, decision_function_shape='ovr') elif kernel == 'linearSVC': clf = svm.LinearSVC(penalty='l2', dual=False, C=10, random_state=42) clf.fit(XLtrain, yTrain) classifier.saveMachinestate(clf, name) classifier.saveScaler(scaler, name) scores = cross_val_score(clf, XLtrain, yTrain, cv=10, scoring='recall_macro') print("Recall: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print() print("Scores") print(scores) tempAccuracyScore, tempPrecision, tempClassificationReport\ , tempf1Score = classifier.predict(XLtest, clf, yTest) accuracyScore.append(tempAccuracyScore) f1Score.append(tempf1Score) precision.append(tempPrecision) classificationReport.append(tempClassificationReport) print() print("The best parameters for the different channels are:") print() print(bestParams) print() print("The prediction accuracy for the different channels is:") print(accuracyScore) print("The f1 score which include false negatives etc is:") print( f1Score ) #This score says something about the correctness of the prediction. print("The precision score:") print( precision ) #This score says something about the correctness of the prediction. print("Classification Report:") print(classificationReport[0])
def multiplottestHHT(c): x, y = dataset.loadDataset("longdata.txt", filterCondition=True, filterType="DC") #print(y[0]) x, y = dataset.sortDataset(x, y, length=10, classes=[c]) channelsToPlot = 4 N_imfs = 6 for i in range(len(x[0])): #Iternate over number of elements in dataset title = movements[y[0][i]] #print title title = title[1:] #print title fig = plt.figure(figsize=(20, 10)) #fig = plt.figure() plt.suptitle(title) outer = gridspec.GridSpec(1, channelsToPlot, wspace=0.1, hspace=0.2, right=0.98, left=0.02, bottom=0.02, top=0.95) for j in range(channelsToPlot): #Iterate over channels inner = gridspec.GridSpecFromSubplotSpec(N_imfs + 1, 1, subplot_spec=outer[j], wspace=0.1, hspace=0.1) xt = x[j][i] imfs = fixedIterationHht(xt) n_imfs = imfs.shape[0] length = len(xt) t = np.arange(0, length / glb.fs, 1.0 / glb.fs) time_samples = t signal = xt ax = plt.Subplot(fig, inner[0]) #ax = plt.subplot(n_imfs + 1, 1, 1) ax.plot(time_samples, signal) ax.axis([ time_samples[0], time_samples[-1], signal.min(), signal.max() ]) ax.tick_params(which='both', left=False, bottom=False, labelleft=False, labelbottom=False) ax.grid(False) ax.set_ylabel('Signal') ax.set_title(channels[j]) fig.add_subplot(ax) axis_extent = max(np.max(np.abs(imfs[:-1, :]), axis=0)) for k in range(n_imfs - 1): #print(i + 2) ax = plt.Subplot(fig, inner[k + 1]) #ax = plt.subplot(n_imfs + 1, 1, i + 2) #ax.plot(time_samples, imfs[k, :]) ax.plot(time_samples, imfs[k, :]) ax.axis([ time_samples[0], time_samples[-1], -axis_extent, axis_extent ]) ax.tick_params(which='both', left=False, bottom=False, labelleft=False, labelbottom=False) ax.grid(False) ax.set_ylabel('imf' + str(k + 1)) fig.add_subplot(ax) ax = plt.Subplot(fig, inner[n_imfs]) #ax = plt.subplot(n_imfs + 1, 1, n_imfs + 1) ax.plot(time_samples, imfs[-1, :], 'r') ax.axis('tight') ax.tick_params(which='both', left=False, bottom=False, labelleft=False, labelbottom=False) ax.grid(False) ax.set_ylabel('res.') fig.add_subplot(ax) plt.show()
def startLearning(): bestParams = [] accuracyScore = [] f1Score = [] precision = [] classificationReport = [] #classifierstring = "learning260RBFsvm22Features" classifierstring = 'AllFeatures' #X, y = dataset.loadDataset("longdata.txt") ''' X, y = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=250) ''' dataset.setDatasetFolder(1) X1, y1 = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=250) X1, y1 = dataset.sortDataset(X1, y1, length=130, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], merge=True) #,6,4,2,8 ''' X1T, y1T = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=250) X1T, y1T = dataset.sortDataset(X1T, y1T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8 ''' dataset.setDatasetFolder(2) X2, y2 = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=True, windowLength=2) X2, y2 = dataset.sortDataset(X2, y2, length=130, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], merge=True, zeroClassMultiplier=1) #,6,4,2,8 ''' X2T, y2T = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=250) X2T, y2T = dataset.sortDataset(X2T, y2T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8 ''' #X, y = dataset.sortDataset(X, y, length=10000, classes=[6,8], merge = False) #def sortDataset(x=None, y=None, length=10, classes=[0,5,4,2,6,8]) #if x or y is undefined, data.txt will be loaded channelIndex = 0 ''' FUNC_MAP = {0: hfd, 1: minDiff, 2: maxDiff, 3: specEntropy, 4: pearsonCoeff14, 5: stdDeviation, 6: slope, 7: thetaBeta1, 8: extrema 9: pearsonCoeff13} ''' #XL = features.extractFeatures(X, channelIndex) featuremask = features.readFeatureMask(classifierstring) XL1 = features.extractFeaturesWithMask(X1, featuremask=featuremask, printTime=False) XL2 = features.extractFeaturesWithMask(X2, featuremask=featuremask, printTime=False) #If a test set is needed for the combined subject model ''' XL1T = features.extractFeaturesWithMask( X1T, featuremask=featuremask, printTime=False) XL2T = features.extractFeaturesWithMask( X2T, featuremask=featuremask, printTime=False) ''' #uncomment for using samples as features ''' XL2 = X2[0] print(len(X2[0])) for i in range(len(X2[0])): #XL2[i] = np.concatenate((XL2[i], X2[1][i], X2[3][i])) #np.append(XL[i], X[1][i]) #np.append(XL[i], X[2][i]) #np.append(XL[i], X[3][i]) print(len(XL2[0])) ''' #Scale the data if needed and split dataset into training and testing #proposed solution, change input to makeScaler when creating for one subject. See classifier.py #XLjoined = np.append(XL1, XL2, axis = 0) scaler = classifier.makeScaler(XL2) XLtrain1, XLtest1, yTrain1, yTest1, XL1 = classifier.scaleAndSplit( XL1, y1[0], scaler) XLtrain2, XLtest2, yTrain2, yTest2, XL2 = classifier.scaleAndSplit( XL2, y2[0], scaler) #XLtrain1T, XLtest1T, yTrain1T, yTest1T, XL1T = classifier.scaleAndSplit(XL1T, y1T[0], scaler) #XLtrain2T, XLtest2T, yTrain2T, yTest2T, XL2T = classifier.scaleAndSplit(XL2T, y2T[0], scaler) #This is to combine the two subjects #yTrain = np.append(yTrain1, yTrain2, axis = 0) #XLtrain = np.append(XLtrain1, XLtrain2, axis = 0) #yTest = np.append(yTest1, yTest2, axis = 0) #XLtest = np.append(XLtest1, XLtest2, axis = 0) #bestParams.append(classifier.tuneSvmParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1)) #bestParams.append(tuneDecisionTreeParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1)) #try this with tuning of parameters later today. #clf, clfPlot = createAndTrain(XLtrain, yTrain, bestParams[0]) #Use this if predictor other than SVM is used. clf, clfPlot = createAndTrain(XLtrain2, yTrain2, None) #plot.trainingPredictions(clf, XL, y[0]) ###TO PLOT LEARNING CURVE UNCOMMENT THIS. #title = "Learning Curves (SVM, RBF kernel, C = 50, $\gamma=0.001$)" #estimator = svm.SVC(kernel = 'rbf', gamma = 0.01, C = 50, decision_function_shape = 'ovr') #plot.learningCurve(estimator, title, XL, y[0], (0.7, 1.01), cv=20, n_jobs=-1) #plt.show() #clf = classifier.loadMachineState(classifierstring) classifier.saveMachinestate( clf, classifierstring) #Uncomment this to save the machine state classifier.saveScaler(scaler, classifierstring) #clf = CalibratedClassifierCV(svm.SVC(kernel = 'linear', C = C, decision_function_shape = 'ovr'), cv=5, method='sigmoid') #Use this if it is important to see the overall prediction, and not for only the test set scores = cross_val_score(clf, XLtrain2, yTrain2, cv=50, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print() print("Scores") print(scores) tempAccuracyScore, tempPrecision, tempClassificationReport, tempf1Score = classifier.predictTimer( XLtest2, clf, yTest2) accuracyScore.append(tempAccuracyScore) f1Score.append(tempf1Score) precision.append(tempPrecision) classificationReport.append(tempClassificationReport) #crossValScore.append(tempCrossValScore) #accuracyScore, classificationReport = compareFeatures(XL, XLtrain, yTrain, XLtest, yTest, bestParams) print() print("The best parameters for the different channels are:") print() print(bestParams) print() print("The prediction accuracy for the different channels is:") print(accuracyScore) print("The f1 score which include false negatives etc is:") print( f1Score ) #This score says something about the correctness of the prediction. print("The precision score:") print( precision ) #This score says something about the correctness of the prediction. print( "Classification Report of channel %d:" % channelIndex ) #String, weird if you print whole array with string, and predicting over several channels. print(classificationReport[0])
def compareFeatures(n_jobs=1, datasetnum=1, shift=False, windowLength=250, zeroClassMultiplier=1): #array declaration trainings = 0 allPermutations = [] allParams = [] allPavg = [] allP = [] allR = [] allF1 = [] allS = [] #Constants and configuration maxNumFeatures = 8 minNumFeatures = 6 #Must be bigger than 1 #datasetfile = "longdata.txt" datasetfile = "data.txt" merge = True skip = False logging = False print( "Setup for brute force testing of all feature combination, features in list: %d" % len(FUNC_MAP)) print("Enter maximum number of features: (\"all\" for all combinations)") inputString = raw_input() if inputString.isdigit(): inputval = int(inputString) if inputval > len(FUNC_MAP): print("Invalid input, exiting") return if inputval >= 1: maxNumFeatures = inputval else: print("Invalid input, exiting") return else: if inputString == "all": maxNumFeatures = len(FUNC_MAP) minNumFeatures = 1 skip = True else: print("Invalid input, exiting") return if not skip: print("Enter minimum number of features: ") inputString = raw_input() if inputString.isdigit(): inputval = int(inputString) if inputval <= 0: print("Invalid input, exiting") return else: print("Invalid input, exiting") return if inputval >= 1: minNumFeatures = inputval else: print("Invalid input, exiting") return print("Is this a debug session?(Will leak memory) [Y/n]") inputString = raw_input() if inputString == "Y": debug = True print( "Debug session activated, results will not be valid and memory will explode." ) sendMail = False logging = False else: debug = False print("Normal session activated, results will be valid. ") print("Do you want to send a mail notification when finished? [Y/n]") inputString = raw_input() if inputString == "Y": sendMail = True print("Sending mail when script is finished") else: print("Do not send mail when script is finished") sendMail = False print("Do you want to save logs? [Y/n]") inputString = raw_input() if inputString == "Y": logging = True print("Saving logs") else: print("Do not save logs") logging = False #Load dataset #print("Before load") ''' X, y = dataset.loadDataset(filename="data.txt", filterCondition=True, filterType="DcNotch", removePadding=True, shift=False, windowLength=200) #print("After load") #print X if datasetfile == "longdata.txt": classes = [0,5,6,4,2,8] else: classes = [0,1,2,3,4,5,6,7,8,9] #classes = [9,7,3,1,0,5] X, y = dataset.sortDataset(X, y, length=1000, classes=classes, merge=merge, zeroClassMultiplier=1.2) if merge: classes = [0,5,6,4,2,8] #y = dataset.mergeLabels(y) else: classes = [0,1,2,3,4,5,6,7,8,9] #Calculate features #XL = extractAllFeatures(X, channel=0) XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP))) #XLtrain, XLtest, yTrain, yTest = classifier.scaleAndSplit(XL, y[0]) scaler = classifier.makeScaler(XL) XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit(XL, y[0], scaler) ''' XL = [[], [], [], [], [], [], [], []] y = [[], [], [], [], [], [], [], []] XLlist = [] ylist = [] XLtrain = None XLtest = None yTrain = None yTest = None ##### Code if isinstance(datasetnum, int): var = datasetnum datasetnum = [] datasetnum.append(var) print(datasetnum) for i in datasetnum: print i dataset.setDatasetFolder(i) X, Y = dataset.loadDataset(filename=datasetfile, filterCondition=True, filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength) Xl, Y = dataset.sortDataset(X, Y, length=130, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], merge=True, zeroClassMultiplier=zeroClassMultiplier) XLlist.append(Xl) ylist.append(Y) XL, y = dataset.mergeDatasets(XL, Xl, y, Y) for i in range(len(XLlist)): XLlist[i] = extractFeaturesWithMask(XLlist[i], featuremask=range(len(FUNC_MAP)), printTime=False) print("XL list featureextraction finished") XL = extractFeaturesWithMask(XL, featuremask=range(len(FUNC_MAP)), printTime=False) print("XL featureextraction finished") scaler = classifier.makeScaler(XL) for i in range(len(XLlist)): XLtrain1, XLtest1, yTrain1, yTest1, XLlist[ i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler) if i == 0: XLtrain = XLtrain1 yTrain = yTrain1 XLtest = XLtest1 yTest = yTest1 else: XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain, yTrain1) XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest, yTest1) print("Split fininshed, starting training") features = range(len(XL[0])) print("Featureextraction finished, number of features to check: %d" % len(XL[0])) if len(features) < maxNumFeatures: maxNumFeatures = len(features) elif maxNumFeatures < minNumFeatures: maxNumFeatures = minNumFeatures if minNumFeatures < 1: minNumFeatures = 1 elif minNumFeatures > maxNumFeatures: minNumFeatures = maxNumFeatures print("Testing with combinations of %d to %d" % (minNumFeatures, maxNumFeatures)) numberOfCombinations = 0 for i in range(minNumFeatures, maxNumFeatures + 1): #numberOfCombinations += len(list(combinations(features, i))) #Dette sprenger minnet :/ comb = nCr(len(features), i) print("Number of iterations for combinations of length %d: %d" % (i, comb)) numberOfCombinations += comb print("Number of combinations to test %d" % numberOfCombinations) #for i in range(minNumFeatures, maxNumFeatures+1): for i in range(maxNumFeatures, minNumFeatures - 1, -1): print("Starting to read PermutationLog") try: permfile = open( "Logs" + slash + "PermutationLog" + str(i) + ".txt", 'r') #permfile = open("Logs"+slash+"PermutationLog.txt", 'r') except IOError: print("PermutationLog file does not exist") skip = False else: print("Performing operations on PermutationLog buffer") PermutationsString = permfile.read() permfile.close() PermutationsList = PermutationsString.split(':') PermutationsList.pop(0) #PermutationsList = tuple(PermutationsList) #Might need some more processing, now returns a list of tuples #print PermutationsList[28] for q in range(len(PermutationsList)): #print(eval(PermutationsList[i])) #PermutationsList[i] = tuple(map(int, PermutationsList[i][1:-1].split(','))) PermutationsList[q] = tuple(eval(PermutationsList[q])) print("Finished with operations") skip = True start = datetime.now() print("Finished reading permutations file") lastTrainings = 1000 for p in combinations(features, i): #If order matters use permutations if skip == True: if p in PermutationsList: #print("Combination exists") numberOfCombinations -= 1 trainings += 1 if trainings == lastTrainings: lastTrainings = trainings + 1000 print("Training number: %d" % trainings) print("Remaining combinations: %d" % numberOfCombinations) print( "Elapsed time for checking that this combination exists: " + str(elapsedTime)) stop = datetime.now() elapsedTime = (stop - start) start = stop else: print("Found Starting point") skip = False if skip == False: start = datetime.now() XLtrainPerm = np.empty([len(XLtrain), i]) XLtestPerm = np.empty([len(XLtest), i]) for j in range(len(XLtrain)): #print j #print([XLtrain[j][k] for k in p]) XLtrainPerm[j] = [XLtrain[j][k] for k in p] for j in range(len(XLtest)): #print j #print([XLtest[j][k] for k in p]) XLtestPerm[j] = [XLtest[j][k] for k in p] #print(XLtrainPerm[0]) #print(XLtestPerm[0]) #print("Starting to train with combination: "+convertPermutationToFeatureString(p)) bestParams, presc, r, f1, s, report = classifier.tuneSvmParameters( XLtrainPerm, yTrain, XLtestPerm, yTest, debug=False, fast=True, n_jobs=n_jobs) if logging: permfile = open( dir_path + slash + "Logs" + slash + "PermutationLog" + str(i) + ".txt", 'a+') permfile.write(":") permfile.write(str(p)) permfile.close() permfile = open( dir_path + slash + "Logs" + slash + "PrecisionLog" + str(i) + ".txt", 'a+') permfile.write(":") for k in range(len(presc)): permfile.write(',' + str(presc[k])) permfile.close() permfile = open( dir_path + slash + "Logs" + slash + "RecallLog" + str(i) + ".txt", 'a+') permfile.write(":") for k in range(len(r)): permfile.write(',' + str(r[k])) permfile.close() permfile = open( dir_path + slash + "Logs" + slash + "F1Log" + str(i) + ".txt", 'a+') permfile.write(":") for k in range(len(f1)): permfile.write(',' + str(f1[k])) permfile.close() if debug: #Append scores allPermutations.append(p) allParams.append(bestParams) allP.append(presc) allPavg.append(np.average(presc, weights=s)) allR.append(r) allF1.append(f1) winner = allPavg.index( max(allPavg)) #Check for max average precision print(report) print( "Best features so far are: " + convertPermutationToFeatureString( allPermutations[winner])) print("Best result so far are: ", allPavg[winner]) #print("Best parameters for this feature combination: " + str(bestParams)) stop = datetime.now() numberOfCombinations -= 1 trainings += 1 remainingTime = (stop - start) * numberOfCombinations elapsedTime = (stop - start) print("Training number: %d" % trainings) print("Remaining combinations: %d" % numberOfCombinations) print("Elapsed time for training with this combination: " + str(elapsedTime)) print("Estimated remaining time: " + str(remainingTime)) ''' #Evaluate score if len(allPavg) > 1: winner = allPavg.index(max(allPavg)) #Check for max average precision p = allPermutations[winner] XLtrainPerm = np.empty([len(XLtrain), len(p)]) XLtestPerm = np.empty([len(XLtest), len(p)]) p = allPermutations[winner] for j in range(len(XLtrain)): XLtrainPerm[j] = [XLtrain[j][k] for k in p] for j in range(len(XLtest)): XLtestPerm[j] = [XLtest[j][k] for k in p] print("Best features for max average precision are:") print allPermutations[winner] #Test bestParams = allParams[winner] print("Best parameters for max average precision are: ") print(bestParams) if bestParams['kernel'] == 'linear': clf = svm.SVC(kernel =bestParams['kernel'], C = bestParams['C'], decision_function_shape = 'ovr') else: clf = svm.SVC(kernel = bestParams['kernel'], gamma=bestParams['gamma'], C= bestParams['C'], decision_function_shape='ovr') clf.fit(XLtrainPerm, yTrain) saveMachinestate(clf, "BruteForceClassifier") featuremask = open("featuremask.txt", 'w+') featuremask.write(str(allPermutations[winner])) #featuremask.write(",") featuremask.close() yPred = clf.predict(XLtestPerm) print(classification_report(yTest, yPred)) ''' if sendMail: mail.sendemail( from_addr='*****@*****.**', to_addr_list=['*****@*****.**', '*****@*****.**'], cc_addr_list=[], subject="Training finished with combinations of %d to %d features" % (minNumFeatures, maxNumFeatures), message="Logs are ready for download ", login='******', password='******')
def compareFeatures2(name, shift, windowLength, n_jobs=-1, X=None, y=None, plot=True): #datasetfile = "longdata.txt" datasetfile = "data.txt" merge = True if (X == None) or (y == None): X, y = dataset.loadDataset(filename=datasetfile, filterCondition=True, filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength) #print("After load") #print X if datasetfile == "longdata.txt": classes = [0, 5, 6, 4, 2, 8] else: classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] #classes = [9,7,3,1,0,5] X, y = dataset.sortDataset(X, y, length=1000, classes=classes, merge=merge) #,6,4,2,8 if merge: classes = [0, 5, 6, 4, 2, 8] #y = dataset.mergeLabels(y) else: classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] #Calculate features #XL = extractAllFeatures(X, channel=0) XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP))) scaler = classifier.makeScaler(XL) XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit( XL, y[0], scaler) #scaler = StandardScaler() #XL = scaler.fit_transform(XL, y[0]) #XLtest = scaler.fit_transform(XLtest, yTest) clf = svm.SVC(kernel="linear", C=10, decision_function_shape='ovr') #clf = svm.LinearSVC(penalty = 'l2', loss='squared_hinge', dual = False, C = 10, random_state = 42) #clf = RandomForestClassifier(n_estimators = 45, max_depth = 10, min_samples_leaf = 1, random_state = 40) #clf = svm.LinearSVC(penalty = 'l2', dual = False, C = 10, random_state = 42) #clf = linear_model.SGDClassifier(penalty = 'l2', random_state = 42) rfecv = RFECV(estimator=clf, step=1, cv=10, n_jobs=n_jobs, scoring='accuracy') rfecv.fit(XL, y[0]) print("Optimal number of features : %d" % rfecv.n_features_) print("Optimal features: ") print(rfecv.support_) writeFeatureMask(rfecv.support_, name) print("The ranking of the features: ") print(rfecv.ranking_) print("The scores for each feature combination:") print(rfecv.grid_scores_) if plot: # Plot number of features VS. cross-validation scores plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (nb of correct classifications)") plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) plt.show() print("After feature selection: ") scores = cross_val_score(rfecv.estimator_, XLtrain, yTrain, cv=10, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print() print("Scores") print(scores)