Ejemplo n.º 1
0
        # pairing to predict and evaluate a file (test_fileID) using a classifier (classifierID)
        testFileIDandClassifierIDs = [(test_fileID, classifierID) for test_fileID in test_fileIDs]
        # print('# testFileIDandClassifierIDs =', testFileIDandClassifierIDs)
        y_test_byFile, y_pred_byFile = [], []
        for testFileCnt, testFileIDandClassifierID in enumerate(testFileIDandClassifierIDs):
            epochNumByStage_testL, epochNumByStage_predL, avg_ep_testL, avg_ep_predL = [], [], [], []
            print('testFileIDandClassifierID = ' + str(testFileIDandClassifierID))
            testFileID = testFileIDandClassifierID[0]
            # print('testFileIDandClassifierID[0] =', testFileIDandClassifierID[0])
            predictionTargetDataFilePath = params.pickledDir + '/' + params.eegFilePrefix + '.' + testFileID + '.pkl'
            print('predictionTargetDataFilePath =', predictionTargetDataFilePath)
            dataFileHandler = open(predictionTargetDataFilePath, 'rb')
            (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler)
            totalEpochNum = len(stageSeq[lstm_length-1:])
            params_for_classifier = ParameterSetup(paramFileName='params.'+classifierID+'.json')
            params_for_classifier.markovOrderForPrediction = markovOrder
            (y_test, y_pred) = classifySequentially(params_for_classifier, paramID, params.pickledDir, testFileIDandClassifierID)
            y_test_byFile.append(y_test)
            y_pred_byFile.append(y_pred)
            # loopCnt += 1
            # if loopCnt > 2:
            #    break

        testFileIDandClassifierIDs_byBlock.append(testFileIDandClassifierIDs)
        y_test_byBlock.append(y_test_byFile)
        y_pred_byBlock.append(y_pred_byFile)
        # if loopCnt > 2:
        #    break

    testFileIDandClassifierIDs_byMethod.append(testFileIDandClassifierIDs_byBlock)
    y_test_byMethod.append(y_test_byBlock)
Ejemplo n.º 2
0
    train_fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID)
    train_fileIDs = [train_fileID for _, _, train_fileID in train_fileTripletL]
    # print('# train_fileIDs =', train_fileIDs)
    test_fileTripletL = getFilesNotUsedInTrain(params, train_fileIDs)
    all_fileTripletL = test_fileTripletL
    allFileIDandClassifierIDs = [(fileID, classifierID) for fileID in fileIDs]
    print('# allFileIDandClassifierIDs =', allFileIDandClassifierIDs)

    y_testL, y_predL = [], []
    for fileCnt, fileIDandClassifierID in enumerate(allFileIDandClassifierIDs):
        epochNumByStage_testL, epochNumByStage_predL, avg_ep_testL, avg_ep_predL = [], [], [], []
        print('fileIDandClassifierID = ' + str(fileIDandClassifierID))
        fileID = fileIDandClassifierID[0]
        predictionTargetDataFilePath = params.pickledDir + '/' + params.eegFilePrefix + '.' + fileID + '.pkl'
        print('predictionTargetDataFilePath =', predictionTargetDataFilePath)
        dataFileHandler = open(predictionTargetDataFilePath, 'rb')
        (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler)
        totalEpochNum = len(stageSeq[lstm_length - 1:])
        params.markovOrderForPrediction = 0
        (y_test, y_pred) = classifySequentially(params, paramID,
                                                params.pickledDir,
                                                fileIDandClassifierID)
        y_testL.append(y_test)
        y_predL.append(y_pred)

    allFileIDandClassifierIDsL.append(allFileIDandClassifierIDs)
    y_predLL.append(y_predL)

with open('../data/pickled/y_test_and_y_pred_for_graphs.pkl', 'wb') as f:
    pickle.dump((allFileIDandClassifierIDsL, y_testL, y_predLL), f)
Ejemplo n.º 3
0
def test_by_classifierID(params, datasetType, classifierID):
    paramDir = params.pickledDir
    testFileDir = params.pickledDir
    stageLabels = params.stageLabels4evaluation
    labelNum = len(stageLabels)
    resultFileDescription = ''
    paramID = 0
    markovOrder = 0
    fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID)
    train_fileIDs = [fileID for _, _, fileID in fileTripletL]
    # print('# train_fileIDs =', train_fileIDs)
    params_test = params
    if datasetType == 'test':
        params_test.pickledDir = testFileDir
    test_fileTripletL = getFilesNotUsedInTrain(params_test, train_fileIDs)
    testFileIDandClassifierIDs = [(test_fileID, classifierID) for _, _, test_fileID in test_fileTripletL]
    fileNum = len(testFileIDandClassifierIDs)
    print('# testFileIDandClassifierIDs =', testFileIDandClassifierIDs)
    # totalConfusionMat = np.zeros((labelNum, labelNum))

    # for paramID in range(len(classifierParams)):
    #     print('classifier parameter = ' + str(classifierParams[paramID]))
    sensitivityL = [[] for _ in range(labelNum)]
    specificityL = [[] for _ in range(labelNum)]
    accuracyL = [[] for _ in range(labelNum)]
    precisionL = [[] for _ in range(labelNum)]
    f1scoreL = [[] for _ in range(labelNum)]
    mccL = [[] for _ in range(labelNum)]
    mcMCCL = []
    mcAccuracyL = []
    confusionMatL = []

    for testFileIDandClassifierID in testFileIDandClassifierIDs:

        print('testFileIDandClassifierID = ' + str(testFileIDandClassifierID))
        params_for_classifier = ParameterSetup(paramFileName='params.'+classifierID+'.json')
        params_for_classifier.markovOrderForPrediction = markovOrder
        (y_test, y_pred) = classifySequentially(params_for_classifier, paramID, paramDir, testFileIDandClassifierID)

        print('y_test =', y_test)
        print('type(y_test) =', type(y_test))
        y_test = np.array(['W' if elem == 'RW' else elem for elem in y_test])
        print('after replace: y_test =', y_test)
        print('after replace: type(y_test) =', type(y_test))

        # ignore ?'s in the beginning produced by
        # i = 0
        # while y_pred[i] == '?':
        #    i++
        if params.classifierType == 'deep':
            i = params.torch_lstm_length - 1   # remove from all clalssifiers because LSTM cannot predict first 9 elements.
        else:
            i = 0
        print('for classifier ', testFileIDandClassifierID, ', first ', i, ' elements are removed.', sep='')

        y_test, y_pred = y_test[i:], y_pred[i:]

        (stageLabels, sensitivity, specificity, accuracy, precision, f1score) = y2sensitivity(y_test, y_pred)
        (stageLabels4confusionMat, confusionMat) = y2confusionMat(y_test, y_pred, params.stageLabels4evaluation)
        printConfusionMat(stageLabels4confusionMat, confusionMat)
        # totalConfusionMat = totalConfusionMat + confusionMat

        # print('y_test = ' + str(y_test[:50]))
        # print('y_pred = ' + str(y_pred[:50]))
        y_length = y_pred.shape[0]
        print('stageLabels =', stageLabels)
        print('labelNum = ' + str(labelNum))
        for labelID in range(labelNum):
            targetLabel = stageLabels[labelID]
            sensitivityL[labelID].append(sensitivity[labelID])
            specificityL[labelID].append(specificity[labelID])
            accuracyL[labelID].append(accuracy[labelID])
            precisionL[labelID].append(precision[labelID])
            f1scoreL[labelID].append(f1score[labelID])
            mcc = mathewsCorrelationCoefficient(stageLabels4confusionMat, confusionMat, targetLabel)
            mccL[labelID].append(mcc)
            print('  targetLabel = ' + targetLabel + ', sensitivity = ' + "{0:.3f}".format(sensitivity[labelID]) + ', specificity = ' + "{0:.3f}".format(specificity[labelID]) + ', accuracy = ' + "{0:.3f}".format(accuracy[labelID])+ ', precision = ' + "{0:.3f}".format(precision[labelID]))
            print('     mcc for ' + targetLabel + ' = ' + "{0:.5f}".format(mcc))
        mcMCCL.append(multiClassMCC(confusionMat))
        print('  multi-class mcc = ' + "{0:.5f}".format(mcMCCL[-1]))
        mcAccuracyL.append(sum(y_test == y_pred) / len(y_test))
        print('  multi-class accuracy = ' + "{0:.5f}".format(mcAccuracyL[-1]))
        confusionMatL.append(confusionMat)
        print('')
        writePredictionResults(testFileIDandClassifierID, params, y_test, y_pred, resultFileDescription)

    if datasetType == 'test':
        f = open(paramDir + '/test_result.' + classifierID + '.test.pkl', 'wb')
    else:
        f = open(paramDir + '/test_result.' + classifierID + '.pkl', 'wb')
    pickle.dump((testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum), f)
    f.close()

    #-----
    # show the summary (average) of the result
    print('Summary for classifierID ' + classifierID + ':')
    printMetadata(params)
    saveStatistics(params.pickledDir, classifierID, testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum, datasetType)
    # print('ch2TimeFrameNum = ' + str(params.ch2TimeFrameNum))
    # print('binWidth4freqHisto = ' + str(params.binWidth4freqHisto))
    sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean = meanStatistics(sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, stageLabels, labelNum, fileNum)
    # sensitivity_by_classifier_L.append(sensitivityMeans)
    # specificity_by_classifier_L.append(specificityMeans)
    # accuracy_by_classifier_L.append(accuracyMeans)
    # precision_by_classifier_L.append(precisionMean)
    # measures_by_classifier_L.append([sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean])
    return [sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean]