train_fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID) train_fileIDs = [train_fileID for _, _, train_fileID in train_fileTripletL] # print('# train_fileIDs =', train_fileIDs) test_fileTripletL = getFilesNotUsedInTrain(params, train_fileIDs) all_fileTripletL = test_fileTripletL allFileIDandClassifierIDs = [(fileID, classifierID) for fileID in fileIDs] print('# allFileIDandClassifierIDs =', allFileIDandClassifierIDs) y_testL, y_predL = [], [] for fileCnt, fileIDandClassifierID in enumerate(allFileIDandClassifierIDs): epochNumByStage_testL, epochNumByStage_predL, avg_ep_testL, avg_ep_predL = [], [], [], [] print('fileIDandClassifierID = ' + str(fileIDandClassifierID)) fileID = fileIDandClassifierID[0] predictionTargetDataFilePath = params.pickledDir + '/' + params.eegFilePrefix + '.' + fileID + '.pkl' print('predictionTargetDataFilePath =', predictionTargetDataFilePath) dataFileHandler = open(predictionTargetDataFilePath, 'rb') (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler) totalEpochNum = len(stageSeq[lstm_length - 1:]) params.markovOrderForPrediction = 0 (y_test, y_pred) = classifySequentially(params, paramID, params.pickledDir, fileIDandClassifierID) y_testL.append(y_test) y_predL.append(y_pred) allFileIDandClassifierIDsL.append(allFileIDandClassifierIDs) y_predLL.append(y_predL) with open('../data/pickled/y_test_and_y_pred_for_graphs.pkl', 'wb') as f: pickle.dump((allFileIDandClassifierIDsL, y_testL, y_predLL), f)
testFileIDandClassifierIDs = [(test_fileID, classifierID) for test_fileID in test_fileIDs] # print('# testFileIDandClassifierIDs =', testFileIDandClassifierIDs) y_test_byFile, y_pred_byFile = [], [] for testFileCnt, testFileIDandClassifierID in enumerate(testFileIDandClassifierIDs): epochNumByStage_testL, epochNumByStage_predL, avg_ep_testL, avg_ep_predL = [], [], [], [] print('testFileIDandClassifierID = ' + str(testFileIDandClassifierID)) testFileID = testFileIDandClassifierID[0] # print('testFileIDandClassifierID[0] =', testFileIDandClassifierID[0]) predictionTargetDataFilePath = params.pickledDir + '/' + params.eegFilePrefix + '.' + testFileID + '.pkl' print('predictionTargetDataFilePath =', predictionTargetDataFilePath) dataFileHandler = open(predictionTargetDataFilePath, 'rb') (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler) totalEpochNum = len(stageSeq[lstm_length-1:]) params_for_classifier = ParameterSetup(paramFileName='params.'+classifierID+'.json') params_for_classifier.markovOrderForPrediction = markovOrder (y_test, y_pred) = classifySequentially(params_for_classifier, paramID, params.pickledDir, testFileIDandClassifierID) y_test_byFile.append(y_test) y_pred_byFile.append(y_pred) # loopCnt += 1 # if loopCnt > 2: # break testFileIDandClassifierIDs_byBlock.append(testFileIDandClassifierIDs) y_test_byBlock.append(y_test_byFile) y_pred_byBlock.append(y_pred_byFile) # if loopCnt > 2: # break testFileIDandClassifierIDs_byMethod.append(testFileIDandClassifierIDs_byBlock) y_test_byMethod.append(y_test_byBlock) y_pred_byMethod.append(y_pred_byBlock)
specificityMat = np.zeros((orderNum, labelNum), dtype=float) accuracyMat = np.zeros((orderNum, labelNum), dtype=float) precisions = np.zeros((orderNum), dtype=float) for markovOrder in range(orderMin, orderMax + 1): print('markovOrder = ' + str(markovOrder)) params.pastStageLookUpNum = markovOrder for fileID in fileIDs: print(' extracting features for fileID = ' + str(fileID)) featureExtraction(params, fileID) for fileID in fileIDs: print(' training and testing for fileID = ' + str(fileID)) trainClassifier(params, paramID, fileID) (y_test, y_pred) = classifySequentially(params, paramID, paramDir, fileID) (stageLabels, sensitivity, specificity, accuracy) = y2sensitivity(y_test, y_pred) # (stageLabels4confusionMat, confusionMat) = y2confusionMat(y_test, y_pred) # printConfusionMat(stageLabels4confusionMat, confusionMat) y_matching = (y_test == y_pred) correctNum = sum(y_matching) # print('y_test = ' + str(y_test[:50])) # print('y_pred = ' + str(y_pred[:50])) y_length = y_pred.shape[0] precision = correctNum / y_length for labelID in range(labelNum): print(' stageLabel = ' + stageLabels[labelID] + ', sensitivity = ' + "{0:.3f}".format(sensitivity[labelID]) + ', specificity = ' + "{0:.3f}".format(specificity[labelID]) +
def test_by_classifierID(params, datasetType, classifierID): paramDir = params.pickledDir testFileDir = params.pickledDir stageLabels = params.stageLabels4evaluation labelNum = len(stageLabels) resultFileDescription = '' paramID = 0 markovOrder = 0 fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID) train_fileIDs = [fileID for _, _, fileID in fileTripletL] # print('# train_fileIDs =', train_fileIDs) params_test = params if datasetType == 'test': params_test.pickledDir = testFileDir test_fileTripletL = getFilesNotUsedInTrain(params_test, train_fileIDs) testFileIDandClassifierIDs = [(test_fileID, classifierID) for _, _, test_fileID in test_fileTripletL] fileNum = len(testFileIDandClassifierIDs) print('# testFileIDandClassifierIDs =', testFileIDandClassifierIDs) # totalConfusionMat = np.zeros((labelNum, labelNum)) # for paramID in range(len(classifierParams)): # print('classifier parameter = ' + str(classifierParams[paramID])) sensitivityL = [[] for _ in range(labelNum)] specificityL = [[] for _ in range(labelNum)] accuracyL = [[] for _ in range(labelNum)] precisionL = [[] for _ in range(labelNum)] f1scoreL = [[] for _ in range(labelNum)] mccL = [[] for _ in range(labelNum)] mcMCCL = [] mcAccuracyL = [] confusionMatL = [] for testFileIDandClassifierID in testFileIDandClassifierIDs: print('testFileIDandClassifierID = ' + str(testFileIDandClassifierID)) params_for_classifier = ParameterSetup(paramFileName='params.'+classifierID+'.json') params_for_classifier.markovOrderForPrediction = markovOrder (y_test, y_pred) = classifySequentially(params_for_classifier, paramID, paramDir, testFileIDandClassifierID) print('y_test =', y_test) print('type(y_test) =', type(y_test)) y_test = np.array(['W' if elem == 'RW' else elem for elem in y_test]) print('after replace: y_test =', y_test) print('after replace: type(y_test) =', type(y_test)) # ignore ?'s in the beginning produced by # i = 0 # while y_pred[i] == '?': # i++ if params.classifierType == 'deep': i = params.torch_lstm_length - 1 # remove from all clalssifiers because LSTM cannot predict first 9 elements. else: i = 0 print('for classifier ', testFileIDandClassifierID, ', first ', i, ' elements are removed.', sep='') y_test, y_pred = y_test[i:], y_pred[i:] (stageLabels, sensitivity, specificity, accuracy, precision, f1score) = y2sensitivity(y_test, y_pred) (stageLabels4confusionMat, confusionMat) = y2confusionMat(y_test, y_pred, params.stageLabels4evaluation) printConfusionMat(stageLabels4confusionMat, confusionMat) # totalConfusionMat = totalConfusionMat + confusionMat # print('y_test = ' + str(y_test[:50])) # print('y_pred = ' + str(y_pred[:50])) y_length = y_pred.shape[0] print('stageLabels =', stageLabels) print('labelNum = ' + str(labelNum)) for labelID in range(labelNum): targetLabel = stageLabels[labelID] sensitivityL[labelID].append(sensitivity[labelID]) specificityL[labelID].append(specificity[labelID]) accuracyL[labelID].append(accuracy[labelID]) precisionL[labelID].append(precision[labelID]) f1scoreL[labelID].append(f1score[labelID]) mcc = mathewsCorrelationCoefficient(stageLabels4confusionMat, confusionMat, targetLabel) mccL[labelID].append(mcc) print(' targetLabel = ' + targetLabel + ', sensitivity = ' + "{0:.3f}".format(sensitivity[labelID]) + ', specificity = ' + "{0:.3f}".format(specificity[labelID]) + ', accuracy = ' + "{0:.3f}".format(accuracy[labelID])+ ', precision = ' + "{0:.3f}".format(precision[labelID])) print(' mcc for ' + targetLabel + ' = ' + "{0:.5f}".format(mcc)) mcMCCL.append(multiClassMCC(confusionMat)) print(' multi-class mcc = ' + "{0:.5f}".format(mcMCCL[-1])) mcAccuracyL.append(sum(y_test == y_pred) / len(y_test)) print(' multi-class accuracy = ' + "{0:.5f}".format(mcAccuracyL[-1])) confusionMatL.append(confusionMat) print('') writePredictionResults(testFileIDandClassifierID, params, y_test, y_pred, resultFileDescription) if datasetType == 'test': f = open(paramDir + '/test_result.' + classifierID + '.test.pkl', 'wb') else: f = open(paramDir + '/test_result.' + classifierID + '.pkl', 'wb') pickle.dump((testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum), f) f.close() #----- # show the summary (average) of the result print('Summary for classifierID ' + classifierID + ':') printMetadata(params) saveStatistics(params.pickledDir, classifierID, testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum, datasetType) # print('ch2TimeFrameNum = ' + str(params.ch2TimeFrameNum)) # print('binWidth4freqHisto = ' + str(params.binWidth4freqHisto)) sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean = meanStatistics(sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, stageLabels, labelNum, fileNum) # sensitivity_by_classifier_L.append(sensitivityMeans) # specificity_by_classifier_L.append(specificityMeans) # accuracy_by_classifier_L.append(accuracyMeans) # precision_by_classifier_L.append(precisionMean) # measures_by_classifier_L.append([sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean]) return [sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean]