def onlineAccuracy(gtLogFile, predLogFile):
    """
    Evaluate the performance of the online prediction by comparing 
    the ground truth log file to the prediction log file.
    
    @param gtLogFile:
    @param predLogFile:
    """

    classesToIgnore = ["Home"]
    #classesToIgnore = []

    # Syntax: key = old class name, value = new class name:
    classesToRename = {"Cycling": "Street"}
    #classesToRename = {}

    # Syntax key = new class name, value: list of old classes that should be merged:
    #classesToMerge = {"Transport": ["Car", "Bus", "Train", "Tram"]}
    classesToMerge = {}

    with open(gtLogFile) as f:
        reader = csv.reader(f, delimiter="\t")
        gtListOriginal = list(reader)
    
    with open(predLogFile) as f:
        reader = csv.reader(f, delimiter="\t")
        predListOriginal = list(reader)
  
    # These classes will be completely removed from the prediction and the ground truth lists:
    for ignoreClass in classesToIgnore:
        # Remove every class that should be ignored:
        gtListOriginal = [el for el in gtListOriginal if ignoreClass not in el]
        predListOriginal = [el for el in predListOriginal if ignoreClass not in el]

    # Rename equivalent classes, so that both classes will be treated as one:
    for i in range(len(gtListOriginal)):
        for j in range(len(gtListOriginal[i])):
            if gtListOriginal[i][j] in classesToRename.keys():
                gtListOriginal[i][j] = classesToRename[gtListOriginal[i][j]]
    for i in range(len(predListOriginal)):
        for j in range(len(predListOriginal[i])):
            if predListOriginal[i][j] in classesToRename.keys():
                predListOriginal[i][j] = classesToRename[predListOriginal[i][j]]

    # Merge multiple classes into a new class:
    for i in range(len(gtListOriginal)):
        for j in range(len(gtListOriginal[i])):
            for k in range(len(classesToMerge.values())):
                if gtListOriginal[i][j] in classesToMerge.values()[k]:
                    gtListOriginal[i][j] = classesToMerge.keys()[k]
    for i in range(len(predListOriginal)):
        for j in range(len(predListOriginal[i])):
            for k in range(len(classesToMerge.values())):
                if predListOriginal[i][j] in classesToMerge.values()[k]:
                    predListOriginal[i][j] = classesToMerge.keys()[k]

    numRecStartedGT = 0
    numRecStartedPred = 0

    # List containing the indices of all RECORDING_STARTED entries in the
    # GT and the prediction log file
    recStartedListGT = []
    recStartedListPred = []

    for i in range(len(gtListOriginal)):
        if len(gtListOriginal[i]) <= 1:
            numRecStartedGT += 1
            recStartedListGT.append(i)

    for i in range(len(predListOriginal)):
        if (predListOriginal[i][0] == "RECORDING_STARTED"):
            numRecStartedPred += 1
            recStartedListPred.append(i)
   
    # If the predication and the ground truth file have a different number
    # of RECORDING_STARTED entry, it is useless for use and we stop here:
    if (numRecStartedPred != numRecStartedGT):
        print("Prediction and ground truth file don't match, cannot" +
        " evaluate the accuracy:")
        print("Prediction file has " + str(numRecStartedPred) + " RECORDING_STARTED entries " +
        "and GT file has " + str(numRecStartedGT) + " RECORDING_STARTED entries")
        return None 


    classesDict = createClassesDict(gtListOriginal, predListOriginal)

    y_GT = []
    y_pred = []

    # Now create predictions and ground truth arrays from one RECORDING_STARTED
    # entry the until the next one:
    for k in range(numRecStartedPred):

        tmpGT = np.array(gtListOriginal)
        tmpPred = np.array(predListOriginal)

        startIdxGT = recStartedListGT[k]+1
        startIdxPred = recStartedListPred[k]+1
        if (k < (numRecStartedPred-1)):
            endIdxGT = recStartedListGT[k+1]
            endIdxPred = recStartedListPred[k+1]
        else:
            endIdxGT = len(gtListOriginal)
            endIdxPred = len(predListOriginal)

        gtList = list(tmpGT[startIdxGT:endIdxGT])
        predList = list(tmpPred[startIdxPred:endIdxPred])
       
        # Round every entry to 0.5s:
        for i in range(len(gtList)):
            try:
                gtList[i][0] = round(2 * float(gtList[i][0]))/2
            except:
                pdb.set_trace()

        # Find start and stop time, i.e. min and max values:
        tmpArray = np.array(gtList)

        # If there is not more entry after a RECORDING_STARTED line, do nothing:
        if tmpArray.shape[0] != 0:

            start_time_gt = min(tmpArray[:,0].astype(np.float32, copy=False))
            stop_time_gt = max(tmpArray[:,0].astype(np.float32, copy=False))

            y_GT_tmp = createGTArray(gtList, classesDict)

            y_pred_tmp = createPredictionArray(predList, start_time_gt, 
            stop_time_gt, len(y_GT_tmp), classesDict)

            y_GT.extend(y_GT_tmp)
            y_pred.extend(y_pred_tmp)

    y_GT = np.array(y_GT)
    y_pred = np.array(y_pred)

    y_gt_ravel = y_GT.ravel()
    y_gt_ravel = y_gt_ravel[y_gt_ravel != -1]
    freq = itemfreq(y_gt_ravel)
    n_entries = sum(freq[:,1])
    
    print("--- GT distribution: ---")
    for i in range(freq.shape[0]):
        perc = round( 100 * (freq[i,1] / n_entries), 1)
        print(classesDict[int(freq[i,0])] + " " + str(perc) + "%")
    print("------")

    # Whenever silence is predicted, we ignore those parts for the calculation 
    # of the accuracy, i.e. we delete those entries from the GT and the
    # prediction array:

    silenceClassNum = classesDict["silence"]

    # Calculate how many % of the predicitions are silent for each classes, 
    # only points where GT provided will be considered here:
    freq = itemfreq(y_pred).astype(int)

    silenceCount = freq[np.where(freq[:,0] == silenceClassNum)[0][0], 1]
    totalCount = freq[:,1].sum()
    
    print("In total " + str(round(silenceCount/ (float(totalCount)), 2) * 100) + 
    "% of all considered samples were silent")
    print("-----")

    # Calculate how many percent of the samples are silent for each class in the ground truth:
    silencePerClass(y_pred, y_GT, classesDict, silenceClassNum)

    # Remove points that were silent or where no ground truth was provided:
    y_pred, y_GT = removeInvalids(y_pred, y_GT, silenceClassNum)

    # Calculate the overall accuracy and print it:
    correctPred = 0
    for i in range(y_pred.shape[0]):
        if y_pred[i] in y_GT[i,:]:
            correctPred += 1
    
    accuracy = correctPred / float(y_pred.shape[0])
    print("Overall accuracy: " + str(round(accuracy*100,2)) + "%")
    print("-----")

    # The method to plot the confusion matrix, needs a classes dictionary, 
    # that is NOT bidirectional, so we remove all elements, where the keys
    # are numbers:
    uniDirectionalClassesDict = {}
    for key in classesDict.keys():
        if type(key) is str:
            uniDirectionalClassesDict[key] = classesDict[key]

    # Adjust the classesDict by removing the entry for silence:
    if "silence" in uniDirectionalClassesDict.keys():
        silenceNumber = uniDirectionalClassesDict["silence"]
        # Decrement numbers after the the position where silence was:
        for key in uniDirectionalClassesDict.keys():
            if uniDirectionalClassesDict[key] > silenceNumber:
                uniDirectionalClassesDict[key] = uniDirectionalClassesDict[key] - 1
        
        # Delete the silence entry:
        del uniDirectionalClassesDict["silence"]

    # Adjust the ground truth and the prediction array, by decrementing class 
    # numbers, larger than, the silence class number:
    for i in range(silenceNumber+1, len(uniDirectionalClassesDict)+1):
        y_GT[y_GT == i] = i-1
        y_pred[y_pred == i] = i-1

    confusionMatrixMulti(y_GT, y_pred, uniDirectionalClassesDict)
def evaluateGMM(trainedGMM, evalFeatures, evalAmps, evalLabels, silenceClassNum):
    """

    @param trainedGMM:
    @param evalFeatures: not scaled!
    @param evalAmps: Amplitude values
    @param evalLabels: Ground truth label array with multiple labels per data points
    @param silenceClassNum:
    @return:
    """

    """ Calculate the predictions on the evaluation features: """
    y_pred = makePrediction(trainedGMM, evalFeatures, evalAmps, silenceClassNum)

    n_classes = len(trainedGMM["classesDict"])
    
    # Delete invalid rows:
    invalidRow = np.array([-1,-1,-1,-1,-1])
    maskValid = ~np.all(evalLabels==invalidRow,axis=1)
    evalLabels = evalLabels[maskValid]
    y_pred = y_pred[maskValid]

    # Calculate how many percent of the samples are silent and delete silent samples:
    maskNonSilent = (y_pred != silenceClassNum)
    numSilentSamples = np.sum(~maskNonSilent)
    silentPercentage = numSilentSamples / float(y_pred.shape[0])
    print(str(round(silentPercentage*100,2)) + "% percent of all samples are silent")

    evalLabels = evalLabels[maskNonSilent]
    y_pred = y_pred[maskNonSilent]
    
    # Calculate the overall accuracy and print it:
    correctPred = 0
    for i in range(y_pred.shape[0]):
        if y_pred[i] in evalLabels[i,:]:
            correctPred += 1

    accuracy = correctPred / float(y_pred.shape[0])
    print("Overall accuracy: " + str(round(accuracy*100,2)) + "%")
    print("-----")

    """ Calculate confusion matrix: """
    cm = np.zeros((n_classes,n_classes))

    for i in range(y_pred.shape[0]):
        if y_pred[i] in evalLabels[i,:]:
            """ If correct prediction made, add one on the corresponding diagonal element in the confusion matrix: """
            cm[int(y_pred[i]),int(y_pred[i])] += 1
        else:
            """ If not predicted correctly, divide by the number of ground truth labels for that point and split
            between corresponding non-diagonal elements: """
            gtLabels = evalLabels[i,:]
            labels = gtLabels[gtLabels != -1] #ground truth labels assigned to that point (only valid ones)
            n_labels = len(labels) #number of valid labels assigned
            weight = 1/float(n_labels) #value that will be added to each assigned (incorrect) label

            for label in labels:
                cm[int(label), int(y_pred[i])] += weight

    normCM = []

    for row in cm:
        rowSum = sum(row)
        normCM.append([round(x/float(rowSum),2) for x in row])

    """ Sort labels: """
    sortedTmp = sorted(trainedGMM["classesDict"].iteritems(), key=operator.itemgetter(1))
    sortedLabels = []
    for j in range(len(sortedTmp)):
        sortedLabels.append(sortedTmp[j][0])


    """ Calculate precision: """
    colSum = np.sum(cm, axis=0)
    precisions = []
    for i in range(n_classes):
        tmpPrecision = cm[i,i] / float(colSum[i])
        # print("Precision " + str(sortedLabels[i]) + ": " + str(tmpPrecision))
        precisions.append(tmpPrecision)

    """ Calculate recall: """
    recalls = []
    for i in range(n_classes):
        recalls.append(normCM[i][i])
        # print("Recall " + str(sortedLabels[i]) + ": " + str(normCM[i][i]))

    """ Calculate F1-score: """
    F1s = {}
    for i in range(n_classes):
        tmpF1 = 2 * (precisions[i] * recalls[i]) / float(precisions[i] + recalls[i])
        # print("F1 " + str(sortedLabels[i]) + ": " + str(tmpF1))
        F1s[sortedLabels[i]] = tmpF1

    # Plot the confusion matrix:
    confusionMatrixMulti(evalLabels, y_pred, trainedGMM["classesDict"], ssh=True)

    resDict = {"accuracy": accuracy, "F1dict": F1s}

    return resDict
Ejemplo n.º 3
0
def offlineAccuracy(gmm, jsonFileList, gtLogFile):
    """
    Test classifier on data recorded in the experiment. The features used have to be extracted
    from the individual parts of the file and not from the whole file at once. 
    
    @param gmm: GMM classifier object
    @param jsonFileList: List of files containing the extracted features for the
    indivdual parts of the file.
    @param gtLogFile: Text file containing the ground truth. Individual parts are separated
    by RECORDING_STARTED entries
    """

    with open(gtLogFile) as f:
        reader = csv.reader(f, delimiter="\t")
        gtListOriginal = list(reader)
    
    # List containing the indices of all RECORDING_STARTED entries
    recStartedList = []
    for i in range(len(gtListOriginal)):
        if len(gtListOriginal[i]) <= 1:
            recStartedList.append(i)

    # The number of given feature file has to match the number of RECORDING_STARTED entries:
    if (len(recStartedList) != len(jsonFileList)):
        print("Ground truth file does not match the number of provided feature files " 
        + "evaluation will be stopped: ")
        print(str(len(jsonFileList)) + " feature files were provided, but ground truth " + 
        "file contains only " + str(len(recStartedList)) + " RECORDING_STARTED entries")
        return None

    y_pred = []
    y_gt = []
    # Make prediction and compare it to GT for each RECORDING_STARTED entry to the next one:
    for k in range(len(jsonFileList)):
        
        silenceClassNum = max(gmm["classesDict"].values())+1
        y_pred_tmp = createPrediction(gmm, jsonFileList[k], silenceClassNum)
        y_pred_tmp = y_pred_tmp.tolist()

        tmpGT = np.array(gtListOriginal)
        startIdx = recStartedList[k]+1

        if (k < (len(recStartedList)-1)):
            endIdx = recStartedList[k+1]
        else:
            endIdx = len(gtListOriginal)

        gtList = list(tmpGT[startIdx:endIdx])
        y_gt_tmp = createGTMulti(gmm["classesDict"], len(y_pred_tmp), gtList)
        y_gt_tmp = y_gt_tmp.tolist()

        y_pred.extend(y_pred_tmp)
        y_gt.extend(y_gt_tmp)

    y_gt = np.array(y_gt)
    y_pred = np.array(y_pred)
    
    # Delete invalid rows:
    invalidRow = np.array([-1,-1,-1,-1,-1])
    maskValid = ~np.all(y_gt==invalidRow,axis=1)
    y_gt = y_gt[maskValid]
    y_pred = y_pred[maskValid]
  
    # Calculate how many percent of the samples are silent and delete silent samples from
    # y_gt and y_pred:
    maskNonSilent = (y_pred != silenceClassNum)
    numSilentSamples = np.sum(~maskNonSilent)
    silentPercentage = numSilentSamples / float(y_pred.shape[0])
    print(str(round(silentPercentage*100,2)) + "% percent of all samples are silent")

    y_gt = y_gt[maskNonSilent]
    y_pred = y_pred[maskNonSilent]

    # Calculate the overall accuracy and print it:
    correctPred = 0
    for i in range(y_pred.shape[0]):
        if y_pred[i] in y_gt[i,:]:
            correctPred += 1

    accuracy = correctPred / float(y_pred.shape[0])
    print("Overall accuracy: " + str(round(accuracy*100,2)) + "%")
    print("-----")

    confusionMatrixMulti(y_gt, y_pred, gmm["classesDict"])