def onlineAccuracy(gtLogFile, predLogFile): """ Evaluate the performance of the online prediction by comparing the ground truth log file to the prediction log file. @param gtLogFile: @param predLogFile: """ classesToIgnore = ["Home"] #classesToIgnore = [] # Syntax: key = old class name, value = new class name: classesToRename = {"Cycling": "Street"} #classesToRename = {} # Syntax key = new class name, value: list of old classes that should be merged: #classesToMerge = {"Transport": ["Car", "Bus", "Train", "Tram"]} classesToMerge = {} with open(gtLogFile) as f: reader = csv.reader(f, delimiter="\t") gtListOriginal = list(reader) with open(predLogFile) as f: reader = csv.reader(f, delimiter="\t") predListOriginal = list(reader) # These classes will be completely removed from the prediction and the ground truth lists: for ignoreClass in classesToIgnore: # Remove every class that should be ignored: gtListOriginal = [el for el in gtListOriginal if ignoreClass not in el] predListOriginal = [el for el in predListOriginal if ignoreClass not in el] # Rename equivalent classes, so that both classes will be treated as one: for i in range(len(gtListOriginal)): for j in range(len(gtListOriginal[i])): if gtListOriginal[i][j] in classesToRename.keys(): gtListOriginal[i][j] = classesToRename[gtListOriginal[i][j]] for i in range(len(predListOriginal)): for j in range(len(predListOriginal[i])): if predListOriginal[i][j] in classesToRename.keys(): predListOriginal[i][j] = classesToRename[predListOriginal[i][j]] # Merge multiple classes into a new class: for i in range(len(gtListOriginal)): for j in range(len(gtListOriginal[i])): for k in range(len(classesToMerge.values())): if gtListOriginal[i][j] in classesToMerge.values()[k]: gtListOriginal[i][j] = classesToMerge.keys()[k] for i in range(len(predListOriginal)): for j in range(len(predListOriginal[i])): for k in range(len(classesToMerge.values())): if predListOriginal[i][j] in classesToMerge.values()[k]: predListOriginal[i][j] = classesToMerge.keys()[k] numRecStartedGT = 0 numRecStartedPred = 0 # List containing the indices of all RECORDING_STARTED entries in the # GT and the prediction log file recStartedListGT = [] recStartedListPred = [] for i in range(len(gtListOriginal)): if len(gtListOriginal[i]) <= 1: numRecStartedGT += 1 recStartedListGT.append(i) for i in range(len(predListOriginal)): if (predListOriginal[i][0] == "RECORDING_STARTED"): numRecStartedPred += 1 recStartedListPred.append(i) # If the predication and the ground truth file have a different number # of RECORDING_STARTED entry, it is useless for use and we stop here: if (numRecStartedPred != numRecStartedGT): print("Prediction and ground truth file don't match, cannot" + " evaluate the accuracy:") print("Prediction file has " + str(numRecStartedPred) + " RECORDING_STARTED entries " + "and GT file has " + str(numRecStartedGT) + " RECORDING_STARTED entries") return None classesDict = createClassesDict(gtListOriginal, predListOriginal) y_GT = [] y_pred = [] # Now create predictions and ground truth arrays from one RECORDING_STARTED # entry the until the next one: for k in range(numRecStartedPred): tmpGT = np.array(gtListOriginal) tmpPred = np.array(predListOriginal) startIdxGT = recStartedListGT[k]+1 startIdxPred = recStartedListPred[k]+1 if (k < (numRecStartedPred-1)): endIdxGT = recStartedListGT[k+1] endIdxPred = recStartedListPred[k+1] else: endIdxGT = len(gtListOriginal) endIdxPred = len(predListOriginal) gtList = list(tmpGT[startIdxGT:endIdxGT]) predList = list(tmpPred[startIdxPred:endIdxPred]) # Round every entry to 0.5s: for i in range(len(gtList)): try: gtList[i][0] = round(2 * float(gtList[i][0]))/2 except: pdb.set_trace() # Find start and stop time, i.e. min and max values: tmpArray = np.array(gtList) # If there is not more entry after a RECORDING_STARTED line, do nothing: if tmpArray.shape[0] != 0: start_time_gt = min(tmpArray[:,0].astype(np.float32, copy=False)) stop_time_gt = max(tmpArray[:,0].astype(np.float32, copy=False)) y_GT_tmp = createGTArray(gtList, classesDict) y_pred_tmp = createPredictionArray(predList, start_time_gt, stop_time_gt, len(y_GT_tmp), classesDict) y_GT.extend(y_GT_tmp) y_pred.extend(y_pred_tmp) y_GT = np.array(y_GT) y_pred = np.array(y_pred) y_gt_ravel = y_GT.ravel() y_gt_ravel = y_gt_ravel[y_gt_ravel != -1] freq = itemfreq(y_gt_ravel) n_entries = sum(freq[:,1]) print("--- GT distribution: ---") for i in range(freq.shape[0]): perc = round( 100 * (freq[i,1] / n_entries), 1) print(classesDict[int(freq[i,0])] + " " + str(perc) + "%") print("------") # Whenever silence is predicted, we ignore those parts for the calculation # of the accuracy, i.e. we delete those entries from the GT and the # prediction array: silenceClassNum = classesDict["silence"] # Calculate how many % of the predicitions are silent for each classes, # only points where GT provided will be considered here: freq = itemfreq(y_pred).astype(int) silenceCount = freq[np.where(freq[:,0] == silenceClassNum)[0][0], 1] totalCount = freq[:,1].sum() print("In total " + str(round(silenceCount/ (float(totalCount)), 2) * 100) + "% of all considered samples were silent") print("-----") # Calculate how many percent of the samples are silent for each class in the ground truth: silencePerClass(y_pred, y_GT, classesDict, silenceClassNum) # Remove points that were silent or where no ground truth was provided: y_pred, y_GT = removeInvalids(y_pred, y_GT, silenceClassNum) # Calculate the overall accuracy and print it: correctPred = 0 for i in range(y_pred.shape[0]): if y_pred[i] in y_GT[i,:]: correctPred += 1 accuracy = correctPred / float(y_pred.shape[0]) print("Overall accuracy: " + str(round(accuracy*100,2)) + "%") print("-----") # The method to plot the confusion matrix, needs a classes dictionary, # that is NOT bidirectional, so we remove all elements, where the keys # are numbers: uniDirectionalClassesDict = {} for key in classesDict.keys(): if type(key) is str: uniDirectionalClassesDict[key] = classesDict[key] # Adjust the classesDict by removing the entry for silence: if "silence" in uniDirectionalClassesDict.keys(): silenceNumber = uniDirectionalClassesDict["silence"] # Decrement numbers after the the position where silence was: for key in uniDirectionalClassesDict.keys(): if uniDirectionalClassesDict[key] > silenceNumber: uniDirectionalClassesDict[key] = uniDirectionalClassesDict[key] - 1 # Delete the silence entry: del uniDirectionalClassesDict["silence"] # Adjust the ground truth and the prediction array, by decrementing class # numbers, larger than, the silence class number: for i in range(silenceNumber+1, len(uniDirectionalClassesDict)+1): y_GT[y_GT == i] = i-1 y_pred[y_pred == i] = i-1 confusionMatrixMulti(y_GT, y_pred, uniDirectionalClassesDict)
def evaluateGMM(trainedGMM, evalFeatures, evalAmps, evalLabels, silenceClassNum): """ @param trainedGMM: @param evalFeatures: not scaled! @param evalAmps: Amplitude values @param evalLabels: Ground truth label array with multiple labels per data points @param silenceClassNum: @return: """ """ Calculate the predictions on the evaluation features: """ y_pred = makePrediction(trainedGMM, evalFeatures, evalAmps, silenceClassNum) n_classes = len(trainedGMM["classesDict"]) # Delete invalid rows: invalidRow = np.array([-1,-1,-1,-1,-1]) maskValid = ~np.all(evalLabels==invalidRow,axis=1) evalLabels = evalLabels[maskValid] y_pred = y_pred[maskValid] # Calculate how many percent of the samples are silent and delete silent samples: maskNonSilent = (y_pred != silenceClassNum) numSilentSamples = np.sum(~maskNonSilent) silentPercentage = numSilentSamples / float(y_pred.shape[0]) print(str(round(silentPercentage*100,2)) + "% percent of all samples are silent") evalLabels = evalLabels[maskNonSilent] y_pred = y_pred[maskNonSilent] # Calculate the overall accuracy and print it: correctPred = 0 for i in range(y_pred.shape[0]): if y_pred[i] in evalLabels[i,:]: correctPred += 1 accuracy = correctPred / float(y_pred.shape[0]) print("Overall accuracy: " + str(round(accuracy*100,2)) + "%") print("-----") """ Calculate confusion matrix: """ cm = np.zeros((n_classes,n_classes)) for i in range(y_pred.shape[0]): if y_pred[i] in evalLabels[i,:]: """ If correct prediction made, add one on the corresponding diagonal element in the confusion matrix: """ cm[int(y_pred[i]),int(y_pred[i])] += 1 else: """ If not predicted correctly, divide by the number of ground truth labels for that point and split between corresponding non-diagonal elements: """ gtLabels = evalLabels[i,:] labels = gtLabels[gtLabels != -1] #ground truth labels assigned to that point (only valid ones) n_labels = len(labels) #number of valid labels assigned weight = 1/float(n_labels) #value that will be added to each assigned (incorrect) label for label in labels: cm[int(label), int(y_pred[i])] += weight normCM = [] for row in cm: rowSum = sum(row) normCM.append([round(x/float(rowSum),2) for x in row]) """ Sort labels: """ sortedTmp = sorted(trainedGMM["classesDict"].iteritems(), key=operator.itemgetter(1)) sortedLabels = [] for j in range(len(sortedTmp)): sortedLabels.append(sortedTmp[j][0]) """ Calculate precision: """ colSum = np.sum(cm, axis=0) precisions = [] for i in range(n_classes): tmpPrecision = cm[i,i] / float(colSum[i]) # print("Precision " + str(sortedLabels[i]) + ": " + str(tmpPrecision)) precisions.append(tmpPrecision) """ Calculate recall: """ recalls = [] for i in range(n_classes): recalls.append(normCM[i][i]) # print("Recall " + str(sortedLabels[i]) + ": " + str(normCM[i][i])) """ Calculate F1-score: """ F1s = {} for i in range(n_classes): tmpF1 = 2 * (precisions[i] * recalls[i]) / float(precisions[i] + recalls[i]) # print("F1 " + str(sortedLabels[i]) + ": " + str(tmpF1)) F1s[sortedLabels[i]] = tmpF1 # Plot the confusion matrix: confusionMatrixMulti(evalLabels, y_pred, trainedGMM["classesDict"], ssh=True) resDict = {"accuracy": accuracy, "F1dict": F1s} return resDict
def offlineAccuracy(gmm, jsonFileList, gtLogFile): """ Test classifier on data recorded in the experiment. The features used have to be extracted from the individual parts of the file and not from the whole file at once. @param gmm: GMM classifier object @param jsonFileList: List of files containing the extracted features for the indivdual parts of the file. @param gtLogFile: Text file containing the ground truth. Individual parts are separated by RECORDING_STARTED entries """ with open(gtLogFile) as f: reader = csv.reader(f, delimiter="\t") gtListOriginal = list(reader) # List containing the indices of all RECORDING_STARTED entries recStartedList = [] for i in range(len(gtListOriginal)): if len(gtListOriginal[i]) <= 1: recStartedList.append(i) # The number of given feature file has to match the number of RECORDING_STARTED entries: if (len(recStartedList) != len(jsonFileList)): print("Ground truth file does not match the number of provided feature files " + "evaluation will be stopped: ") print(str(len(jsonFileList)) + " feature files were provided, but ground truth " + "file contains only " + str(len(recStartedList)) + " RECORDING_STARTED entries") return None y_pred = [] y_gt = [] # Make prediction and compare it to GT for each RECORDING_STARTED entry to the next one: for k in range(len(jsonFileList)): silenceClassNum = max(gmm["classesDict"].values())+1 y_pred_tmp = createPrediction(gmm, jsonFileList[k], silenceClassNum) y_pred_tmp = y_pred_tmp.tolist() tmpGT = np.array(gtListOriginal) startIdx = recStartedList[k]+1 if (k < (len(recStartedList)-1)): endIdx = recStartedList[k+1] else: endIdx = len(gtListOriginal) gtList = list(tmpGT[startIdx:endIdx]) y_gt_tmp = createGTMulti(gmm["classesDict"], len(y_pred_tmp), gtList) y_gt_tmp = y_gt_tmp.tolist() y_pred.extend(y_pred_tmp) y_gt.extend(y_gt_tmp) y_gt = np.array(y_gt) y_pred = np.array(y_pred) # Delete invalid rows: invalidRow = np.array([-1,-1,-1,-1,-1]) maskValid = ~np.all(y_gt==invalidRow,axis=1) y_gt = y_gt[maskValid] y_pred = y_pred[maskValid] # Calculate how many percent of the samples are silent and delete silent samples from # y_gt and y_pred: maskNonSilent = (y_pred != silenceClassNum) numSilentSamples = np.sum(~maskNonSilent) silentPercentage = numSilentSamples / float(y_pred.shape[0]) print(str(round(silentPercentage*100,2)) + "% percent of all samples are silent") y_gt = y_gt[maskNonSilent] y_pred = y_pred[maskNonSilent] # Calculate the overall accuracy and print it: correctPred = 0 for i in range(y_pred.shape[0]): if y_pred[i] in y_gt[i,:]: correctPred += 1 accuracy = correctPred / float(y_pred.shape[0]) print("Overall accuracy: " + str(round(accuracy*100,2)) + "%") print("-----") confusionMatrixMulti(y_gt, y_pred, gmm["classesDict"])