def __init__(self): params = ParameterSetup() pickledDir = params.pickledDir self.fileIDs_L = [] try: outlierHandler = open(pickledDir + '/' + 'outliers.txt', 'r') # print(outlierHandler) for line in outlierHandler: elems = line.split(' ') parts = elems[2].split('-') fileID = parts[2] + '-' + parts[3] + '-' + parts[4] # print(fileID) self.fileIDs_L.append(fileID) except EnvironmentError: pass
def __init__(self): params = ParameterSetup() pickledDir = params.pickledDir self.fileIDs_L = [] try: sd_threshHandler = open(pickledDir + '/' + 'sd_thresh.json', 'r') d = json.load(sd_threshHandler) self.W_amount_mean = d["W_amount_mean"] self.W_amount_2sd = d["W_amount_2sd"] self.R_amount_mean = d["R_amount_mean"] self.R_amount_2sd = d["R_amount_2sd"] self.S_amount_mean = d["S_amount_mean"] self.S_amount_2sd = d["S_amount_2sd"] self.W_episode_num_mean = d["W_episode_num_mean"] self.W_episode_num_2sd = d["W_episode_num_mean"] self.R_episode_num_mean = d["R_episode_num_mean"] self.R_episode_num_2sd = d["R_episode_num_mean"] self.S_episode_num_mean = d["S_episode_num_mean"] self.S_episode_num_2sd = d["S_episode_num_mean"] self.W_duration_mean = d["W_duration_mean"] self.W_duration_2sd = d["W_duration_mean"] self.R_duration_mean = d["R_duration_mean"] self.R_duration_2sd = d["R_duration_mean"] self.S_duration_mean = d["S_duration_mean"] self.S_duration_2sd = d["S_duration_mean"] except EnvironmentError: self.W_amount_mean = 0 self.W_amount_2sd = 0 self.R_amount_mean = 0 self.R_amount_2sd = 0 self.S_amount_mean = 0 self.S_amount_2sd = 0 self.W_episode_num_mean = 0 self.W_episode_num_2sd = 0 self.R_episode_num_mean = 0 self.R_episode_num_2sd = 0 self.S_episode_num_mean = 0 self.S_episode_num_2sd = 0 self.W_duration_mean = 0 self.W_duration_2sd = 0 self.R_duration_mean = 0 self.R_duration_2sd = 0 self.S_duration_mean = 0 self.S_duration_2sd = 0
def getFeatures(self, eegSegment, timeStampSegment=0, time_step=0, local_mu=0, local_sigma=0): params = ParameterSetup() widths = params.waveletWidths waveletTransformed = signal.cwt(eegSegment, signal.ricker, widths) inputTensor = np.array([waveletTransformed]) # print('inputTensor.shape = ' + str(inputTensor.shape)) featureDownSampler = FeatureDownSampler() # print('self.outputDim = ' + str(self.outputDim)) waveletTransformedDownsampled = featureDownSampler.downSample( inputTensor, self.outputDim)[0] # print('waveletTransformedDownsampled.shape = ' + str(waveletTransformedDownsampled.shape)) return waveletTransformedDownsampled
def setStagePredictor(self, classifierID): paramFileName = 'params.' + str(classifierID) + '.json' finalClassifierDir = self.params.finalClassifierDir paramsForNetworkStructure = ParameterSetup(paramDir=finalClassifierDir, paramFileName=paramFileName) classifier = DeepClassifier( self.classLabels, classifierID=classifierID, paramsForDirectorySetup=self.params, paramsForNetworkStructure=paramsForNetworkStructure) model_path = finalClassifierDir + '/weights.' + str( classifierID) + '.pkl' print('model_path = ', model_path) classifier.load_weights(model_path) self.stagePredictor = StagePredictor( paramsForNetworkStructure, self.extractor, classifier, finalClassifierDir, classifierID, self.params.markovOrderForPrediction)
def test_by_classifierID(params, datasetType, classifierID): paramDir = params.pickledDir testFileDir = params.pickledDir stageLabels = params.stageLabels4evaluation labelNum = len(stageLabels) resultFileDescription = '' paramID = 0 markovOrder = 0 fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID) train_fileIDs = [fileID for _, _, fileID in fileTripletL] # print('# train_fileIDs =', train_fileIDs) params_test = params if datasetType == 'test': params_test.pickledDir = testFileDir test_fileTripletL = getFilesNotUsedInTrain(params_test, train_fileIDs) testFileIDandClassifierIDs = [(test_fileID, classifierID) for _, _, test_fileID in test_fileTripletL] fileNum = len(testFileIDandClassifierIDs) print('# testFileIDandClassifierIDs =', testFileIDandClassifierIDs) # totalConfusionMat = np.zeros((labelNum, labelNum)) # for paramID in range(len(classifierParams)): # print('classifier parameter = ' + str(classifierParams[paramID])) sensitivityL = [[] for _ in range(labelNum)] specificityL = [[] for _ in range(labelNum)] accuracyL = [[] for _ in range(labelNum)] precisionL = [[] for _ in range(labelNum)] f1scoreL = [[] for _ in range(labelNum)] mccL = [[] for _ in range(labelNum)] mcMCCL = [] mcAccuracyL = [] confusionMatL = [] for testFileIDandClassifierID in testFileIDandClassifierIDs: print('testFileIDandClassifierID = ' + str(testFileIDandClassifierID)) params_for_classifier = ParameterSetup(paramFileName='params.'+classifierID+'.json') params_for_classifier.markovOrderForPrediction = markovOrder (y_test, y_pred) = classifySequentially(params_for_classifier, paramID, paramDir, testFileIDandClassifierID) print('y_test =', y_test) print('type(y_test) =', type(y_test)) y_test = np.array(['W' if elem == 'RW' else elem for elem in y_test]) print('after replace: y_test =', y_test) print('after replace: type(y_test) =', type(y_test)) # ignore ?'s in the beginning produced by # i = 0 # while y_pred[i] == '?': # i++ if params.classifierType == 'deep': i = params.torch_lstm_length - 1 # remove from all clalssifiers because LSTM cannot predict first 9 elements. else: i = 0 print('for classifier ', testFileIDandClassifierID, ', first ', i, ' elements are removed.', sep='') y_test, y_pred = y_test[i:], y_pred[i:] (stageLabels, sensitivity, specificity, accuracy, precision, f1score) = y2sensitivity(y_test, y_pred) (stageLabels4confusionMat, confusionMat) = y2confusionMat(y_test, y_pred, params.stageLabels4evaluation) printConfusionMat(stageLabels4confusionMat, confusionMat) # totalConfusionMat = totalConfusionMat + confusionMat # print('y_test = ' + str(y_test[:50])) # print('y_pred = ' + str(y_pred[:50])) y_length = y_pred.shape[0] print('stageLabels =', stageLabels) print('labelNum = ' + str(labelNum)) for labelID in range(labelNum): targetLabel = stageLabels[labelID] sensitivityL[labelID].append(sensitivity[labelID]) specificityL[labelID].append(specificity[labelID]) accuracyL[labelID].append(accuracy[labelID]) precisionL[labelID].append(precision[labelID]) f1scoreL[labelID].append(f1score[labelID]) mcc = mathewsCorrelationCoefficient(stageLabels4confusionMat, confusionMat, targetLabel) mccL[labelID].append(mcc) print(' targetLabel = ' + targetLabel + ', sensitivity = ' + "{0:.3f}".format(sensitivity[labelID]) + ', specificity = ' + "{0:.3f}".format(specificity[labelID]) + ', accuracy = ' + "{0:.3f}".format(accuracy[labelID])+ ', precision = ' + "{0:.3f}".format(precision[labelID])) print(' mcc for ' + targetLabel + ' = ' + "{0:.5f}".format(mcc)) mcMCCL.append(multiClassMCC(confusionMat)) print(' multi-class mcc = ' + "{0:.5f}".format(mcMCCL[-1])) mcAccuracyL.append(sum(y_test == y_pred) / len(y_test)) print(' multi-class accuracy = ' + "{0:.5f}".format(mcAccuracyL[-1])) confusionMatL.append(confusionMat) print('') writePredictionResults(testFileIDandClassifierID, params, y_test, y_pred, resultFileDescription) if datasetType == 'test': f = open(paramDir + '/test_result.' + classifierID + '.test.pkl', 'wb') else: f = open(paramDir + '/test_result.' + classifierID + '.pkl', 'wb') pickle.dump((testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum), f) f.close() #----- # show the summary (average) of the result print('Summary for classifierID ' + classifierID + ':') printMetadata(params) saveStatistics(params.pickledDir, classifierID, testFileIDandClassifierIDs, sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, confusionMatL, stageLabels, fileNum, labelNum, datasetType) # print('ch2TimeFrameNum = ' + str(params.ch2TimeFrameNum)) # print('binWidth4freqHisto = ' + str(params.binWidth4freqHisto)) sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean = meanStatistics(sensitivityL, specificityL, accuracyL, precisionL, f1scoreL, mccL, mcMCCL, mcAccuracyL, stageLabels, labelNum, fileNum) # sensitivity_by_classifier_L.append(sensitivityMeans) # specificity_by_classifier_L.append(specificityMeans) # accuracy_by_classifier_L.append(accuracyMeans) # precision_by_classifier_L.append(precisionMean) # measures_by_classifier_L.append([sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean]) return [sensitivityMeans, specificityMeans, accuracyMeans, precisionMean, f1scoreMean, mccMeans, mcMCCMean, mcAccuracyMean]
def __init__(self): self.params = ParameterSetup()
import sys import matplotlib.pyplot as plt import matplotlib from functools import reduce from os.path import splitext import pickle import numpy as np from parameterSetup import ParameterSetup from evaluationCriteria import labels2ID, getEpisodeLengths from sequentialPrediction import classifySequentially from writePredictionResults import writePredictionResults from fileManagement import readTrainFileIDsUsedForTraining, getFilesNotUsedInTrain from fileManagement import getAllEEGFiles, fileIDsFromEEGFiles params = ParameterSetup() lstm_length = params.torch_lstm_length paramID, markovOrderL = 0, [0] print('lstm_length =', lstm_length) fileIDs = fileIDsFromEEGFiles(getAllEEGFiles(params)) # fileIDs = ['D1798', 'D1803', 'D1811', 'D1818', 'D1831', 'D1799', 'D1804', 'D1814', # 'D1819', 'D1833', 'D1800', 'D1805', 'D1815', 'D1820', 'D1801', 'D1806', # 'D1816', 'D1826', 'D1802', 'D1810', 'D1817', 'D1827'] classifierIDs = ['Y4HOFA', '5XTKMY'] methodNames = ['UTSN', 'UTSN-L'] allFileIDandClassifierIDsL = [] y_predLL = [] for classifierID, methodName in zip(classifierIDs, methodNames): train_fileTripletL = readTrainFileIDsUsedForTraining(params, classifierID) train_fileIDs = [train_fileID for _, _, train_fileID in train_fileTripletL]
import numpy as np import pickle from parameterSetup import ParameterSetup from fileManagement import getFileIDs from evaluationCriteria import y2sensitivity, y2confusionMat, printConfusionMat from sequentialPrediction import classifySequentially from featureExtractor import featureExtraction from classifierTrainer import trainClassifier from writeResults import writePredictions orderMin = 0 orderMax = 8 # orderMax = 1 paramID = 0 params = ParameterSetup() pickledDir = params.pickledDir paramDir = params.pickledDir classifierType = params.classifierType classifierParams = params.classifierParams eegFilePrefix = 'eegAndStage' fileIDs = getFileIDs(pickledDir, eegFilePrefix) print('classifier type = ' + str(classifierType)) print('useEMG = ' + str(params.useEMG)) print('emgTimeFrameNum = ' + str(params.emgTimeFrameNum)) print('binWidth4freqHisto = ' + str(params.binWidth4freqHisto)) stageLabels = params.stageLabels4evaluation labelNum = len(stageLabels)
def __init__(self): self.params = ParameterSetup() self.eegDir = self.params.eegDir self.featureDir = self.params.featureDir self.classifierDir = self.params.classifierDir self.extractorType = self.params.extractorType
from ctypes import byref import numpy as np import socket import datetime import threading import tqdm import time import math import pickle from parameterSetup import ParameterSetup params = ParameterSetup() pickledDir = params.pickledDir classifierType = params.classifierType classifierParams = params.classifierParams samplingFreq = params.samplingFreq windowSizeInSec = params.windowSizeInSec wsizeInTimePoints = samplingFreq * windowSizeInSec # window size. data is sampled at 128 Hz, so 1280 sample points = 10 sec. eegFilePrefix = 'eegAndStage' fileID = 'DBL-NO-D0846' # fileID = 'DBL-NO-D0858' # fileID = 'DBL-NO-D0859' dataFileHandler = open( pickledDir + '/' + eegFilePrefix + '.' + fileID + '.pkl', 'rb') (eeg, emg, stageSeq) = pickle.load(dataFileHandler) y_test = np.array(stageSeq) # fileName = '../data/waves/2017-10-05-16-23-02.061887.csv' fileName = '../data/waves/wave_res.csv'
def __init__(self, recordWaves, extractorType, classifierType, classifierID, inputFileID='', offsetWindowID=0, chamberID=-1, samplingFreq=0, epochTime=0): self.recordWaves = recordWaves self.inputFileID = inputFileID self.chamberID = chamberID chamberLabel = '_chamber' + str( self.chamberID + 1) # adds 1 because in Sleep Sign Recorder, chambers start from 1. self.params = ParameterSetup() if samplingFreq == 0: self.samplingFreq = self.params.samplingFreq else: self.samplingFreq = samplingFreq if epochTime == 0: self.samplePointNum = self.params.windowSizeInSec * self.samplingFreq # the number of sample points received at once else: self.samplePointNum = epochTime * self.samplingFreq # the number of sample points received at once self.graphUpdateFreqInHz = self.params.graphUpdateFreqInHz # frequency of updating the graph (if set to 1, redraws graph every second) assert self.samplingFreq / self.graphUpdateFreqInHz == np.floor( self.samplingFreq / self.graphUpdateFreqInHz) # should be an integer self.updateGraph_samplePointNum = np.int(self.samplingFreq / self.graphUpdateFreqInHz) # print('self.updateGraph_samplePointNum =', self.updateGraph_samplePointNum) self.hasGUI = True self.graphColors = ['b', 'g'] self.ylim_max_eeg, self.ylim_max_ch2 = 2.0, 2.0 self.graph_ylim = [[-self.ylim_max_eeg, self.ylim_max_eeg], [-self.ylim_max_ch2, self.ylim_max_ch2]] self.lightPeriodStartTime = self.params.lightPeriodStartTime self.sampleID = 0 self.segmentID = offsetWindowID # makes a classifier class label4EMG = self.params.label4withoutEMG self.showCh2 = self.params.showCh2 self.useCh2ForReplace = self.params.useCh2ForReplace self.minimumCh2Intensity = 0 self.maximumCh2Intensity = 0 self.past_eegSegment, self.past_ch2Segment = np.array([]), np.array([]) # self.previous_eeg, self.previous_ch2 = np.array([]), np.array([]) classifierFilePrefix = self.params.classifierFilePrefix factory = AlgorithmFactory(extractorType) print('generating extractor: ') self.extractor = factory.generateExtractor() # self.classLabels = list(self.params.labelCorrectionDict.keys())[:self.params.maximumStageNum] self.classLabels = self.params.sampleClassLabels[:self.params. maximumStageNum] self.setStagePredictor(classifierID) presentTime = timeFormatting.presentTimeEscaped() logFileID = 'classifier.' + presentTime if self.chamberID != -1: logFileID += chamberLabel logFileName = logFileID + '.csv' self.logFile = open(self.params.logDir + '/' + logFileName, 'a') # connect to an output device self.connected2serialClient = False # print('in __init__ of classifierClient, self.connected2serialClient = False') self.serialClient, self.connected2serialClient = connect_laser_device() self.eeg_till_now = np.zeros((0, )) ''' # prepare for computing Kolmogorov-Smirnov test standardMice_L, files_L = readStandardMice(self.params) self.statisticalTester = StatisticalTester(standardMice_L) self.dSeries_L = [] self.dMat = readdMat(self.params) self.dTensor = readdTensor(self.params) self.dTensorSegmentNum = self.dTensor.shape[1] self.segmentMax4computingKS = self.dTensorSegmentNum * 4 self.segmentMax4computingKS = self.dTensorSegmentNum print('Computes KS (Kolmogorov-Smirnov) till the input reaches segment', self.segmentMax4computingKS) ''' # opens a file for recording waves and prediction results if self.inputFileID == '': outputFileID = timeFormatting.presentTimeEscaped() else: outputFileID = self.inputFileID if self.chamberID != -1: outputFileID += chamberLabel waveFileName = outputFileID + '_wave.csv' self.ch2_mode = "Video" self.ch2_thresh_value = self.params.ch2_thresh_default self.eeg_normalize_for_prediction = 1 self.ch2_normalize_for_prediction = 0 self.eeg_graph_normalize = 0 self.ch2_graph_normalize = 0 self.currentCh2Intensity = 0 if self.recordWaves: self.waveOutputFile = open( self.params.waveOutputDir + '/' + waveFileName, 'a') self.waveOutputFile_standardized = open( self.params.waveOutputDir + '/standardized_' + waveFileName, 'a') self.predictionState = 0 self.one_record = np.zeros((self.samplePointNum, 2)) self.raw_one_record = np.zeros((self.samplePointNum, 2)) self.one_record_for_graph = np.zeros((self.samplePointNum, 2)) self.windowStartTime = '' self.y_pred_L = [] ''' ksFileID = outputFileID if self.chamberID != -1: outputFileID += chamberLabel ksFileName = ksFileID + '_ks.csv' try: self.ksOutputFile = open(self.params.ksDir + '/' + ksFileName, 'a') outLine = 'segmentID, d, chi^2\n' self.writeKS2file(outLine) # self.windowStartTime = presentTime except EnvironmentError: pass ''' # makes a file in params.predDir if self.inputFileID == '': self.predFileID = timeFormatting.presentTimeEscaped() else: self.predFileID = self.inputFileID if self.chamberID != -1: self.predFileID += chamberLabel print('writes prediction results to ' + self.params.predDir + '/' + self.predFileID + '_pred.txt') self.predFile = open( self.params.predDir + '/' + self.predFileID + '_pred.txt', 'w') self.predFileBeforeOverwrite = open( self.params.predDir + '/' + self.predFileID + '_pred_before_overwrite.txt', 'w') self.predFileWithTimeStamps = open( self.params.predDir + '/' + self.predFileID + '_pred_with_timestamps.txt', 'w') self.max_storage_for_standardization = self.samplePointNum * self.params.standardization_max_storage_window_num self.standardizer_eeg = standardizer( self.max_storage_for_standardization) self.standardizer_ch2 = standardizer( self.max_storage_for_standardization)
from __future__ import print_function import sys import json from os import listdir from parameterSetup import ParameterSetup from classifierTrainer import trainClassifier args = sys.argv optionType = args[1] if len(args) > 2 else '' optionVals = args[2:] if len(args) > 2 else [0] print('optionType =', optionType, ', optionVals = ', optionVals) pathFilePath = open('path.json') p = json.load(pathFilePath) paramDir = p['pathPrefix'] + '/' + p['paramsDir'] outputDir = '' for paramFileName in listdir(paramDir): if paramFileName.startswith('params.') and not paramFileName.endswith('~'): print('paramFileName =', paramFileName) params = ParameterSetup(paramDir, paramFileName, outputDir) trainClassifier(params, outputDir, optionType, optionVals)
import sys from parameterSetup import ParameterSetup if __name__ == '__main__': args = sys.argv classifierIDs = args[1:] res = [] for classifierID in classifierIDs: paramFileName = 'params.' + classifierID + '.json' params = ParameterSetup(paramFileName=paramFileName) stageLabels = params.stageLabels4evaluation print('stageLabels =', stageLabels) res.append(classifierID + ', ' + str(params.useRawData) + ', ' + str(params.useFreqHisto) + ', ' + str(params.useTime)) for line in res: print(line)
def readAll(self, sys): #--------------- # set up parameters # get params shared by programs params = ParameterSetup() # oFilter = OutlierMouseFilter() sdFilter = SDFilter() # for signal processing windowSizeInSec = params.windowSizeInSec # size of window in time for estimating the state samplingFreq = params.samplingFreq # sampling frequency of data # parameters for using history preContextSize = params.preContextSize # parameters for making a histogram wholeBand = params.wholeBand binWidth4freqHisto = params.binWidth4freqHisto # bin width in the frequency domain for visualizing spectrum as a histogram # for reading data classifierDir = params.classifierDir classifierName = params.classifierName samplePointNum = samplingFreq * windowSizeInSec # window size. data is sampled at 128 Hz, so 1280 sample points = 10 sec. time_step = 1 / samplingFreq binNum4spectrum = round(wholeBand.getBandWidth() / binWidth4freqHisto) # print('samplePointNum = ' + str(samplePointNum)) # past_eeg = np.empty((samplePointNum, 0), dtype = np.float) # past_freqHisto = np.empty((binNum4spectrum, 0), dtype = np.float) # print('in __init__, past_eeg.shape = ' + str(past_eeg.shape)) pickledDir = params.pickledDir #---------------- # compute parameters wsizeInTimePoints = samplingFreq * windowSizeInSec # window size. data is sampled at 128 Hz, so 1280 sample points = 10 sec. #--------------- # read files outFiles = listdir(pickledDir) self.dirName = sys.argv[1] print('self.dirName =', self.dirName) if len(sys.argv) > 2: pickledDir = self.dataDir + '/' + sys.argv[2] dir_stem, dir_extension = splitext(self.dirName) if dir_extension != '.rar': eegPath = self.dataDir + '/' + self.dirName + '/' + self.eegDir print('path =', eegPath) files = listdir(eegPath) if isdir(eegPath) else [] print('files =', files) stagePath = self.dataDir + '/' + self.dirName + '/' + self.stageDir files4stage = listdir(stagePath) if isdir(stagePath) else [] for fileFullName in files: # print('fileFullName = ' + fileFullName) fileID, file_extension = splitext(fileFullName) if file_extension == '.txt' or file_extension == '.csv': # if oFilter.isOutlier(fileID): # print('file ' + fileID + ' is an outlier.') # else: pklExistsFlag = 0 for outFileName in outFiles: # print("fileID = " + fileID + ", outFileName = " + outFileName) if outFileName == fileID + '.pkl': pklExistsFlag = 1 break if pklExistsFlag == 0: print('fileFullName = ' + fileFullName) # if fileID.endswith('OPT') and file_extension == '.txt': fileName4eeg = fileFullName fileName4stage = '' # if True: for fileFullName4stage in files4stage: # print('fileFullName4stage = ' + fileFullName4stage) fileID4stage, _ = splitext(fileFullName4stage) # print('fileName4eeg = ' + fileName4eeg + ', fileID2 = ' + fileID2) # if fileID2.startswith(fileID) and fileID2 != fileID: if fileID4stage.startswith(fileID): fileName4stage = fileFullName4stage dirFullName = self.dataDir + '/' + self.dirName eegFilePath = dirFullName + '/' + self.eegDir + '/' + fileName4eeg eeg, emg, timeStamps = self.readEEG(eegFilePath) # if fileName4stage == '' and len(sys.argv) == 2: if fileName4stage == '': print( 'file ' + fileName4eeg + ' does not have a corresponding stage file, so labels all stages by ?.' ) stageSeq = ['?' for _ in range(len(timeStamps))] else: print('self.dirName = ' + self.dirName + ', fileName4eeg = ' + fileName4eeg + ', fileName4stage = ' + fileName4stage) #--------------- # read eeg and stages stageFilePath = dirFullName + '/' + self.stageDir + '/' + fileName4stage stageSeq = self.readStageSeq(stageFilePath) #--------------- # write data if sdFilter.isOutlier(eeg): print('file' + fileID + ' is an outlier in terms of mean or std') else: # eeg = (eeg - np.mean(eeg)) / np.std(eeg) # emg = (emg - np.mean(emg)) / np.std(emg) # if fileName4stage == '': # saveData = (eeg, emg, timeStamps) # outpath = open(pickledDir + '/eegOnly.' + fileID + '.pkl', 'wb') #else: # saveData = (eeg, emg, stageSeq, timeStamps) # outpath = open(pickledDir + '/eegAndStage.' + fileID + '.pkl', 'wb') saveData = (eeg, emg, stageSeq, timeStamps) outpath = open( pickledDir + '/eegAndStage.' + fileID + '.pkl', 'wb') pickle.dump(saveData, outpath) outpath.close()
def __init__(self): self.extractorType = 'wavelet-downsampled' params = ParameterSetup() self.outputDim = params.downsample_outputDim
def __init__(self): self.extractorType = 'realFourier-downsampled' params = ParameterSetup() self.outputDim = params.downsample_outputDim
def __init__(self, extractorType): self.extractorType = extractorType params = ParameterSetup() self.outputDim = params.downsample_outputDim
def __init__(self, client, fileID, recordWaves, channelNum, offsetWindowID, sleepTime, sampRate=128, numEpoch=600000, eeg_std=None, ch2_std=None, channelOpt=2): """ # Params - sampRate (int): サンプリングレート - numEpoch (int): 予測を行うエポック数 - eeg_std (float?): EEGについて決め打ちstdがある場合はこれを指定する - ch2_std (float?): ch2について決め打ちstdがある場合はこれを指定する float? はオプショナル型的 (= float or None). """ self.client = client self.recordWaves = recordWaves self.channelNum = channelNum self.offsetWindowID = offsetWindowID self.sleepTime = sleepTime self.sampRate = sampRate self.numEpoch = numEpoch # あらかじめ定められた標準偏差がある場合、その値を保存する self.eeg_std = eeg_std self.ch2_std = ch2_std self.params = ParameterSetup() pickledDir = self.params.pickledDir classifierType = self.params.classifierType classifierParams = self.params.classifierParams samplingFreq = self.params.samplingFreq windowSizeInSec = self.params.windowSizeInSec # self.wSizeInTimePoints = samplingFreq * windowSizeInSec # window size. data is sampled at 128 Hz, so 1280 sample points = 10 sec. self.segmentSizeInTimePoints = self.client.updateGraph_samplePointNum eegFilePrefix = 'eegAndStage' if fileID.startswith('m'): files_L = listdir(pickledDir) shuffle(files_L) for fileFullName in files_L: if fileFullName.startswith(eegFilePrefix): break else: fileFullName = eegFilePrefix + '.' + fileID + '.pkl' print('reading file ' + fileFullName) dataFileHandler = open(pickledDir + '/' + fileFullName, 'rb') (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler) self.eeg = eeg self.ch2 = ch2 self.timeStamps = timeStamps self.stageSeq = stageSeq if self.offsetWindowID > 0: offsetSampleNum = self.offsetWindowID * self.segmentSizeInTimePoints self.eeg = self.eeg[offsetSampleNum:] self.ch2 = self.ch2[offsetSampleNum:] self.timeStamps = self.timeStamps[offsetSampleNum:] self.stageSeq = self.stageSeq[self.offsetWindowID:] self.sLen = len(stageSeq) self.eLen = min(eeg.shape[0], self.sLen * samplingFreq * windowSizeInSec) presentTime = timeFormatting.presentTimeEscaped() fileName = 'daq.' + presentTime + '.csv' self.logFile = open(self.params.logDir + '/' + fileName, 'a')
def getFeatures(self, eegSegment, timeStampSegment, time_step): #--------------- # compute power spectrum and sort it params = ParameterSetup() wholeBand = params.wholeBand binWidth4freqHisto = params.binWidth4freqHisto binNum4spectrum = round(wholeBand.getBandWidth() / binWidth4freqHisto) powerSpect = np.abs(np.fft.fft(eegSegment))**2 freqs = np.fft.fftfreq(len(powerSpect), d=time_step) idx = np.argsort(freqs) sortedFreqs = freqs[idx] sortedPowerSpect = powerSpect[idx] # print(' ') # print('in getFeatures():') # print('time_step = ' + str(time_step)) # print('eegSegment = ' + str(eegSegment)) # print('powerSpect = ' + str(powerSpect)) # print('idx = ' + str(idx)) # print('freqs = ' + str(freqs)) # print('sortedFreqs = ' + str(sortedFreqs)) # print('sortedPowerSpect = ' + str(sortedPowerSpect)) #--------------- # bin spectrum binArray4spectrum = np.linspace(wholeBand.bottom, wholeBand.top, binNum4spectrum + 1) ###### freqs4wholeBand = wholeBand.extractPowerSpectrum( sortedFreqs, sortedFreqs) ### freqs4wholeBand = wholeBand.extractPowerSpectrum(sortedFreqs, sortedPowerSpect) ###### binnedFreqs = np.digitize(freqs4wholeBand, binArray4spectrum, right=False) #---------------- # make a feature vector that contains context windows extractedPowerSpect = wholeBand.extractPowerSpectrum( sortedFreqs, sortedPowerSpect) # print('binnedFreqs = ' + str(binnedFreqs)) # print('extractedPowerSpect = ' + str(extractedPowerSpect)) #---------------- # extract freqHistoWithContext freqHisto = np.array([], dtype=np.float) for key, items in groupby(zip(binnedFreqs, extractedPowerSpect), lambda i: i[0]): itemsA = np.array(list(items)) powerSum = np.sum(np.array([x for x in itemsA[:, 1]])) freqHisto = np.r_[freqHisto, powerSum] #---------------- # add time after light period started as a features # print('timeStampSegment[0] = ' + str(timeStampSegment[0])) # print('timeStampSegment[-1] = ' + str(timeStampSegment[-1])) timeSinceLight = getTimeDiffInSeconds(self.lightPeriodStartTime, timeStampSegment[0]) freqHistoWithTime = np.r_[freqHisto, timeSinceLight] return freqHistoWithTime
from functools import reduce from os.path import splitext import pickle import numpy as np from parameterSetup import ParameterSetup from evaluationCriteria import labels2ID, getEpisodeLengths from sequentialPrediction import classifySequentially from writePredictionResults import writePredictionResults from fileManagement import readTrainFileIDsUsedForTraining, getFileIDsFromRemainingBlocks # crossValidationID = 'ARS0Q' # crossValidationID = '6S3BL' args = sys.argv crossValidationID = args[1] params = ParameterSetup() lstm_length = params.torch_lstm_length paramID, markovOrder = 0, 0 # cross-validate, i.e. test using files not used for training outputDir = '../data/pickled' with open(outputDir + '/crossvalidation_metadata.' + crossValidationID + '.pkl', 'rb') as f: splitID, classifierIDsByMethod = pickle.load(f) print('splitID =', splitID) fileIDsByBlocks = [] with open(outputDir + '/blocks_of_records.' + splitID + '.csv') as f: for line in f: fileIDsByBlocks.append(line.rstrip().split(',')) # print(fieIDs)
def classifySequentially(params, paramID, paramDir, fileIDpair): # print('classifySequentially by ' + str(fileID)) pickledDir = params.pickledDir extractorType = params.extractorType classifierType = params.classifierType classifierParams = params.classifierParams samplingFreq = params.samplingFreq windowSizeInSec = params.windowSizeInSec wsizeInSamplePointNum = windowSizeInSec * samplingFreq # window size in sample points. data is sampled at 128 Hz, so 1280 sample points = 10 sec. timeWindowStrideInSec = params.timeWindowStrideInSec timeWindowStrideInSamplePointNum = timeWindowStrideInSec * samplingFreq markovOrderForPrediction = params.markovOrderForPrediction strideNumInTimeWindow = np.ceil(windowSizeInSec / timeWindowStrideInSec) classifierType = params.classifierType eegFilePrefix = params.eegFilePrefix classifierFilePrefix = params.classifierFilePrefix # replacesWWWRtoWWWW = params.replacesWWWRtoWWWW numOfConsecutiveWsThatProhibitsR = params.numOfConsecutiveWsThatProhibitsR predict_by_batch = params.predict_by_batch testFileID = fileIDpair[0] classifierID = fileIDpair[1] print('$#$#$# in sequentialPrediction, classifierID =', classifierID) predictionTargetDataFilePath = pickledDir + '/' + eegFilePrefix + '.' + testFileID + '.pkl' print('predictionTargetDataFilePath =', predictionTargetDataFilePath) dataFileHandler = open(predictionTargetDataFilePath, 'rb') (eeg, ch2, stageSeq, timeStamps) = pickle.load(dataFileHandler) dataFileHandler.close() print('eeg.shape = ' + str(eeg.shape)) print('len(stageSeq) = ' + str(len(stageSeq))) if params.useEMG: label4EMG = params.label4withEMG else: label4EMG = params.label4withoutEMG params.classifierName = classifierFilePrefix + '.' + classifierType + '.' + label4EMG + '.excludedFileID.' + testFileID + '.classifierID.' + classifierID factory = AlgorithmFactory(extractorType) extractor = factory.generateExtractor() # if params.classifierType == 'deep': # classifier = DeepClassifier(classifierID=testFileID) # else: classifierDir = params.classifierDir pwd = dirname(abspath(__file__)) print('# classifierID =', classifierID) # classLabels = list(params.labelCorrectionDict.keys())[:params.maximumStageNum] classLabels = self.params.sampleClassLabels[:self.params.maximumStageNum] paramFileName = 'params.' + classifierID + '.json' params_for_network_structure = ParameterSetup(paramDir=paramDir, paramFileName=paramFileName) if params.classifierType == 'deep': classifier = DeepClassifier( classLabels, classifierID=classifierID, paramsForDirectorySetup=params, paramsForNetworkStructure=params_for_network_structure) # if classifierID == '': # model_path = pwd + '/' + paramDir + '/model.pkl' #else: # model_path = pwd + '/' + params.deepParamsDir + '/' + testFileID + '/' + classifierID + '/model.pkl' model_path = pwd + '/' + paramDir + '/weights.' + classifierID + '.pkl' print('model_path = ', model_path) classifier.load_weights(model_path) else: classifierFileName = params.classifierDir + '/' + params.classifierPrefix + '.' + classifierID + '.pkl' classifierFileHandler = open(classifierFileName, 'rb') classifier = pickle.load(classifierFileHandler) classifierFileHandler.close() stagePredictor = StagePredictor(params_for_network_structure, extractor, classifier, classifierDir, classifierID, markovOrderForPrediction) ### stagePredictor = StagePredictor(params, extractor, classifier, classifierDir, classifierID) # stagePredictor = StagePredictor(params, extractor, classifier) sLen = len(stageSeq) samplePointNum = min(eeg.shape[0], sLen * wsizeInSamplePointNum) print('%$%$%$ sLen =', sLen) print('eeg.shape[0] =', eeg.shape[0]) print('sLen * wsizeInSamplePointNum =', sLen * wsizeInSamplePointNum) print('%$%$%$ samplePointNum =', samplePointNum) print('%$%$%$ timeWindowStrideInSamplePointNum =', timeWindowStrideInSamplePointNum) # reset eeg and emg statistical values # eeg_mean = 0 # eeg_variance = 0 # emg_mean = 0 # emg_variance= 0 # oldSampleNum = 0 y_test_L = [] y_pred_L = [] wID = 0 # startSamplePoint = 0 # stagePredictions_L = [] replacedR = False # while startSamplePoint + wsizeInSamplePointNum <= samplePointNum: records_L = [] timeStampSegments_L = [] ### all_past_eeg, all_past_ch2 = np.array([]), np.array([]) standardizer_eeg = standardizer(samplePointNum) standardizer_ch2 = standardizer(samplePointNum) # standardized_all_past_eeg = np.array([]) for startSamplePoint in range(0, samplePointNum, timeWindowStrideInSamplePointNum): endSamplePoint = startSamplePoint + wsizeInSamplePointNum if endSamplePoint > samplePointNum: break timeStampSegment = timeStamps[startSamplePoint:endSamplePoint] eegSegment = eeg[startSamplePoint:endSamplePoint] ### standardized_eegSegment, all_past_eeg = standardize(eegSegment, all_past_eeg) standardized_eegSegment = standardizer_eeg.standardize(eegSegment) if params.useEMG: ch2Segment = ch2[startSamplePoint:endSamplePoint] ### standardized_ch2Segment, all_past_ch2 = standardize(ch2Segment, all_past_ch2) standardized_ch2Segment = standardizer_ch2.standardize(ch2Segment) one_record = np.r_[standardized_eegSegment, standardized_ch2Segment] else: one_record = standardized_eegSegment # standardized_all_past_eeg = np.r_[standardized_all_past_eeg, standardized_eegSegment] # print('standardized_all_past_eeg.mean() =', standardized_all_past_eeg.mean(), ', standardized_all_past_eeg.std() =', standardized_all_past_eeg.std()) ''' eegSegment_orig_mean = np.mean(eegSegment) eegSegment_orig_std = np.std(eegSegment) eeg_old_mean = eeg_mean eeg_mean = recompMean(eegSegment, eeg_mean, oldSampleNum) eeg_variance = recompVariance(eegSegment, eeg_variance, eeg_old_mean, eeg_mean, oldSampleNum) standardized_eegSegment = (eegSegment - eeg_mean) / np.sqrt(eeg_variance) if params.usech2: ch2Segment = ch2[startSamplePoint:endSamplePoint] ch2_old_mean = ch2_mean ch2_mean = recompMean(ch2Segment, ch2_mean, oldSampleNum) ch2_variance = recompVariance(ch2Segment, ch2_variance, ch2_old_mean, ch2_mean, oldSampleNum) standardized_ch2Segment = (ch2Segment - ch2_mean) / np.sqrt(ch2_variance) one_record = np.r_[standardized_eegSegment, standardized_ch2Segment] else: one_record = standardized_eegSegment oldSampleNum += eegSegment.shape[0] ''' # local_mu = np.mean(standardized_eegSegment) # local_sigma = np.std(standardized_eegSegment) # print('local_mu =', local_mu) # print(' ') # print('in sequentialPredictionDecisionTree.classifySequentially():') ### print('one_record.shape = ' + str(one_record.shape)) ### print('one_record[:] = ' + str(one_record[:])) # print('one_record[:10] = ' + str(one_record[:10])) # print('one_record[:,1] = ' + str(one_record[:,1])) # stageEstimate is one of ['w', 'n', 'r'] # stagePrediction = params.reverseLabel(stagePredictor.predict(one_record, local_mu=eegSegment_orig_mean, local_sigma=eegSegment_orig_std)) # print('np.mean(one_record[:,0]) = ' + str(np.mean(one_record[:]))) # print('np.std(one_record[:,0]) = ' + str(np.std(one_record[:]))) # print('before stagePredictor.predict') # stagePrediction = params.reverseLabel(stagePredictor.predict(one_record, timeStampSegment, local_mu=local_mu, local_sigma=local_sigma, wID=wID)) if predict_by_batch: records_L.append(one_record) timeStampSegments_L.append(timeStampSegment) else: orig_prediction = stagePredictor.predict( one_record, timeStampSegment, params.stageLabels4evaluation, params.stageLabel2stageID) # print(orig_prediction, end='') stagePrediction = params.reverseLabel(orig_prediction) # print('after stagePredictor.predict') # replaces R to W when W appears consecutively ''' Ws = 'W' * numOfConsecutiveWsThatProhibitsR for wCnt in range(1,numOfConsecutiveWsThatProhibitsR+1): if len(y_pred_L) >= wCnt: if y_pred_L[len(y_pred_L)-wCnt] != 'W': break if stagePrediction == 'R': print(Ws + '->R changed to ' + Ws + '->W at wID = ' + str(wID) + ', startSamplePoint = ' + str(startSamplePoint)) stagePrediction = 'W' replacedR = True ''' #---- # if the prediction is P, then use the previous one if stagePrediction == 'P': # print('stagePrediction == P for wID = ' + str(wID)) if len(y_pred_L) > 0: stagePrediction = y_pred_L[len(y_pred_L) - 1] else: stagePrediction = 'M' #----- # append to the lists of results y_pred_L.append(stagePrediction) if wID >= len(stageSeq): print('len(stageSeq) =', len(stageSeq), ', wID =', wID) print('startSamplePoint =', startSamplePoint, ', samplePointNum =', samplePointNum, ', timeWindowStrideInSamplePointNum =', timeWindowStrideInSamplePointNum) trueLabel = stageSeq[wID] if replacedR: print(' -> for wID = ' + str(wID) + ', trueLabel = ' + trueLabel) replacedR = False y_test_L.append(trueLabel) wID += 1 #----- # vote for choosing the label for 10 second window ''' stagePredictions_L.append(stagePrediction) if len(stagePredictions_L) == strideNumInTimeWindow: finalStagePrediction = voteForStagePrediction(stagePredictions_L[-(markovOrderForPrediction+1):]) #----- # append to the lists of results y_pred_L.append(finalStagePrediction) stagePredictions_L = [] # print('stagePredictions_L[' + stagePrediction + '] = ' + ) # startSamplePoint = endSamplePoint trueLabel = stageSeq[wID] if replacedR: print(' -> for wID = ' + str(wID) + ', trueLabel = ' + trueLabel) replacedR = False y_test_L.append(trueLabel) ### print('wID = ' + str(wID) + ', trueLabel = ' + trueLabel + ', stagePrediction = ' + stagePrediction) wID += 1 ''' if predict_by_batch: # print('predicting by batch:') y_pred = np.array([ params.reverseLabel(y_pred_orig) for y_pred_orig in stagePredictor.batch_predict( np.array(records_L), np.array(timeStampSegments_L), local_mu=local_mu, local_sigma=local_sigma, stageLabels4evaluation=params.stageLabels4evaluation, stageLabel2stageID=params.stageLabel2stageID) ]) y_test = np.array(stageSeq) else: y_test = np.array(y_test_L) y_pred = np.array(y_pred_L) return (y_test, y_pred)
np.array(records_L), np.array(timeStampSegments_L), local_mu=local_mu, local_sigma=local_sigma) ]) ''' else: y_pred = np.array(y_pred_L) ''' return y_pred #-------------------- # main params = ParameterSetup() # print('params.finalClassifierDir = ' + str(params.finalClassifierDir)) extractorType = params.extractorType classifierType = params.classifierType classifierParams = params.classifierParams samplingFreq = params.samplingFreq windowSizeInSec = params.windowSizeInSec wsizeInSamplePointNum = windowSizeInSec * samplingFreq # window size in sample points. data is sampled at 128 Hz, so 1280 sample points = 10 sec. timeWindowStrideInSec = params.timeWindowStrideInSec timeWindowStrideInSamplePointNum = timeWindowStrideInSec * samplingFreq lookBackTimeWindowNum = params.lookBackTimeWindowNum strideNumInTimeWindow = np.ceil(windowSizeInSec / timeWindowStrideInSec) classifierFilePrefix = params.classifierFilePrefix # replacesWWWRtoWWWW = params.replacesWWWRtoWWWW numOfConsecutiveWsThatProhibitsR = params.numOfConsecutiveWsThatProhibitsR
def supersample(x, y): params = ParameterSetup() classLabels = params.sampleClassLabels ratios = np.array(params.subsampleRatios) do_supersample = np.array(params.supersample) do_subsample = 1 for ratio in ratios: if ratio == -1: do_subsample = 0 break if do_supersample or do_subsample: minimumRatio = min(ratios) ratios = ratios / minimumRatio print('sampling ratios = ' + str(ratios)) featureDim = x.shape[1] sorted_y = sorted(y) grouped = [[key, len(list(g))] for key, g in groupby(sorted_y)] for (key, elem_num) in grouped: print(' ' + key + ':' + str(elem_num)) if do_supersample: max_class, max_elem_num = max(grouped) print('max_elem_num = ' + str(max_elem_num) + ' for max_class = ' + max_class) target_nums = [np.int(x) for x in np.floor(max_elem_num * ratios)] else: min_class, min_elem_num = max(grouped) print('min_elem_num = ' + str(min_elem_num) + ' for min_class = ' + min_class) target_nums = [np.int(x) for x in np.floor(min_elem_num * ratios)] print('target_nums = ' + str(target_nums)) classID = 0 sampled_x = np.zeros((0, featureDim), dtype=float) sampled_y = np.zeros((0)) for targetClass in classLabels: print('sampling for classID = ' + str(classID) + ', class = ' + targetClass) isTarget = judgeIfTarget(y, targetClass) orderedIndices = np.arange(isTarget.shape[0]) targetIDs = orderedIndices[isTarget == True] print(' target_nums[' + str(classID) + '] = ' + str(target_nums[classID])) sampledIDs = targetIDs print(' sampledIDs.shape = ' + str(sampledIDs.shape)) if do_supersample: while sampledIDs.shape[0] <= target_nums[classID]: orderedIndices = np.arange(isTarget.shape[0]) targetIDs = orderedIndices[isTarget == True] sampledIDs = np.r_[sampledIDs, targetIDs] print(' sampledIDs.shape = ' + str(sampledIDs.shape)) sampledIDs = sampledIDs[:target_nums[classID]] print(' finally, sampledIDs.shape is reduced to: ' + str(sampledIDs.shape)) print(' x[sampledIDs].shape = ' + str(x[sampledIDs].shape)) # print('x[sampled_IDs] = ' + str(x[sampled_IDs])) sampled_x = np.r_[sampled_x, x[sampledIDs]] sampled_y = np.r_[sampled_y, y[sampledIDs]] # print('sampled_y = ' + str(sampled_y) + ', y[sampled_IDs] = ' + str(y[sampled_IDs])) # if sampled_y.shape[0] > 0: # sampled_y = np.r_[sampled_y, y[sampled_IDs]] # else: # sampled_y = y[sampled_IDs] # sampled_isTarget = isTarget[sampled_IDs] print('sampled_x.shape = ' + str(sampled_x.shape)) print('sampled_y.shape = ' + str(sampled_y.shape)) classID = classID + 1 return (sampled_x, sampled_y) else: print('no supersampling nor subsampling') return (x, y)
def __init__(self): self.extractorType = 'realFourier' params = ParameterSetup()
from parameterSetup import ParameterSetup from tester import printMetadata, test_by_classifierID args = sys.argv if args[1] == 'test': datasetType = 'test' classifierIDs = args[2:] elif args[1] == 'validation': datasetType = 'validation' classifierIDs = args[2:] else: datasetType = 'validation' classifierIDs = args[1:] params = ParameterSetup() printMetadata(params) pickledDir = params.pickledDir # sensitivity_by_classifier_L, specificity_by_classifier_L, accuracy_by_classifier_L, precision_by_classifier_L, measures_by_classifier_L = [], [], [], [], [] measures_by_classifier_L = [] for classifierID in classifierIDs: measures_by_classifier_L.append( test_by_classifierID(params, datasetType, classifierID)) if datasetType == 'test': f = open( pickledDir + '/test_result.measures.' + classifierIDs[0] + '.etc.test.pkl', 'wb') else: f = open( pickledDir + '/test_result.measures.' + classifierIDs[0] + '.etc.pkl',
def start(self): channelOpt = 1 params = ParameterSetup() self.recordWaves = params.writeWholeWaves self.extractorType = params.extractorType self.classifierType = params.classifierType self.postDir = params.postDir self.predDir = params.predDir self.finalClassifierDir = params.finalClassifierDir self.samplingFreq = params.samplingFreq # eegFilePath = args[1] # inputFileID = splitext(split(eegFilePath)[1])[0] postFiles = listdir(self.postDir) fileCnt = 0 for inputFileName in postFiles: if not inputFileName.startswith('.'): print('inputFileName = ' + inputFileName) inputFileID = splitext(inputFileName)[0] print('inputFileID = ' + inputFileID) predFileFullPath = self.predDir + '/' + inputFileID + '_pred.txt' print('predFileFullPath = ' + predFileFullPath) if not isfile(predFileFullPath): fileCnt += 1 print(' processing ' + inputFileID) try: classifierID = selectClassifierID( self.finalClassifierDir, self.classifier_type) if len(self.args) > 1: if self.args[1] == '--output_the_same_fileID': self.client = ClassifierClient( self.recordWaves, self.extractorType, self.classifierType, classifierID, inputFileID=inputFileID) else: self.client = ClassifierClient( self.recordWaves, self.extractorType, self.classifierType, classifierID) else: self.client = ClassifierClient( self.recordWaves, self.extractorType, self.classifierType, classifierID) self.client.predictionStateOn() self.client.hasGUI = False # sys.stdout.write('classifierClient started by ' + str(channelOpt) + ' channel.') except Exception as e: print(str(e)) raise e try: eegFilePath = self.postDir + '/' + inputFileName self.server = EEGFileReaderServer( self.client, eegFilePath, samplingFreq=self.samplingFreq) except Exception as e: print(str(e)) raise e else: print(' skipping ' + inputFileID + ' because ' + predFileFullPath + ' exists.')