Esempio n. 1
0
def trainWithLSTM(params):
    inSeqLen = params['inSeqLen']
    outSeqLen = params['outSeqLen']
    numFeatures = params['numFeatures']
    lstm_layers = params['lstm_layers']
    datasetObj = params['datasetObj']
    allRecords = params['allRecords']
    csvRecordInfo = params['csvRecordInfo']
    epochSeconds = params['epochSeconds']
    slidingWindowSeconds = params['slidingWindowSeconds']
    priorSeconds = params['priorSeconds']
    postSeconds = params['postSeconds']
    trainingEpochs = params['trainingEpochs']
    trainingBatchSize = params['trainingBatchSize']
    validation_split = params['validation_split']
    modelOutputDir = params['modelOutputDir']
    savedModelFilePrefix = params['savedModelFilePrefix']

    lstmObj = eegLSTM("encoder_decoder_sequence")
    # lstmObj = eegLSTM("stacked_LSTM")
    lstmObj.createModel(inSeqLen, outSeqLen, numFeatures, lstm_layers)
    if (dataFormat == "CSV"):
        numSamples = lstmObj.prepareDataSubset_fromCSV(datasetObj, allRecords, 
            csvRecordInfo, (epochSeconds*1000), (slidingWindowSeconds*1000), 
            priorSeconds, postSeconds)
        # If the number of samples is too low, there is no point in training with this dataset
        if (numSamples <= 10):
            print ("numSamples ({}) is too low! Returning without creating a saved model!".format(numSamples))
            return
    else:
        print ("LSTM model for EDF format is not yet implemented in this version")
        exit(-1)
    lstmObj.fit(trainingEpochs, trainingBatchSize, validation_split)
    lstmObj.saveModel(modelOutputDir, savedModelFilePrefix)
    return
Esempio n. 2
0
def testWithLSTM(modelFile, weightsFile, numFeaturesInTestFiles, allFiles):
    lstmObj = eegLSTM("encoder_decoder_sequence")
    lstmObj.loadModel(modelFile, weightsFile)
    print("Loaded LSTM model from disk")
    if (numFeaturesInTestFiles != lstmObj.numFeatures):
        print ("number of features in testfiles ", numFeaturesInTestFiles, 
            "!= number of feature in loaded model ", lstmObj.numFeatures)
    for testFilePath in allFiles.values():
        print ("testFilePath = ", testFilePath)
        lstmObj.prepareDataset_fullfile(testFilePath)
        # dataset = np.loadtxt(testFile, delimiter=',')
        # X = dataset[:,:19]
        # y = dataset[:,19]
        lstmObj.evaluate()
Esempio n. 3
0
    if (dataSubset == "fulldata"):
        print("Will be training on full data")
        priorSeconds = postSeconds = -1
    elif (re.search("seizure\-(\d+), seizure\+(\d+)", dataSubset) != None):
        m = re.match("seizure\-(\d+), seizure\+(\d+)", dataSubset)
        priorSeconds = int(m.group(1))
        postSeconds = int(m.group(2))
        print("data subset = [seizure-" + str(priorSeconds),
              ", seizure+" + str(postSeconds) + "]")

    # Verify that all the records have same features
    features = tuhd.recordInfo[allRecords[0]]['channelLabels']
    featuresSet = set(features)
    for recordID in allRecords:
        tmpSet = set(tuhd.recordInfo[recordID]['channelLabels'])
        xorSet = featuresSet.symmetric_difference(tmpSet)
        if (len(xorSet) > 0):
            print("features are not common between", allRecords[0], "and",
                  recordID)
            exit(-1)
    print("features are common between all the records!")
    numFeatures = len(featuresSet)
    lstmObj = eegLSTM("encoder_decoder_sequence")
    # lstmObj = eegLSTM("stacked_LSTM")
    lstmObj.createModel(inSeqLen, outSeqLen, numFeatures, lstmLayers)
    lstmObj.prepareDataset_fromTUHedf(tuhd, allRecords, priorSeconds,
                                      postSeconds)
    lstmObj.fit(epochs, batchsize)
    lstmObj.saveModel(modelOutputDir, recordID + "LSTM")
Esempio n. 4
0
def testWithHybridModel(lstmModelFile, lstmWeightsFile, dnnModelFile, 
                    dnnWeightsFile, numFeaturesInTestFiles, allFiles,
                    timeStepsToPredict):
    lstmObj = eegLSTM("encoder_decoder_sequence")
    lstmObj.loadModel(lstmModelFile, lstmWeightsFile)
    print("Loaded LSTM model from disk")
    dnnObj = eegDNN("Classifier_3layers")
    dnnObj.loadModel(dnnModelFile, dnnWeightsFile)
    print("Loaded DNN model from disk")
    if (numFeaturesInTestFiles != lstmObj.numFeatures):
        print ("number of features in testfiles ", numFeaturesInTestFiles, 
            "!= number of feature in loaded model ", lstmObj.numFeatures)
    if (numFeaturesInTestFiles != dnnObj.numFeatures):
        print ("number of features in testfiles ", numFeaturesInTestFiles, 
            "!= number of feature in loaded model ", dnnObj.numFeatures)

    for testFilePath in allFiles.values():
        print ("testFilePath = ", testFilePath)
        dataset = pd.read_csv(testFilePath)
        dataset = dataset.values # Convert to a numpy array from pandas dataframe
        numFeatures = lstmObj.numFeatures
        inSeqLen = lstmObj.inSeqLen
        outSeqLen = lstmObj.outSeqLen
        numRowsNeededForTest = max((inSeqLen + outSeqLen), (inSeqLen+timeStepsToPredict))
        numRows = dataset.shape[0]
        print ("inSeqLen={}, outSeqLen={}, numFeatures={}, numRows={}, numRowsNeededForTest={}".format(
            inSeqLen, outSeqLen, numFeatures, numRows, numRowsNeededForTest
        ))
        # lstmObj.prepareDataset_fullfile(testFilePath)
        while (numRows > numRowsNeededForTest):
            numRemainingRows = min (numRows, (inSeqLen+timeStepsToPredict))
            # print ("numRows={}, numFeatures={}, numRemainingRows={}"
            #         .format(numRows, numFeatures, numRemainingRows))

            predictedDataset = np.empty((1, (inSeqLen+timeStepsToPredict), numFeatures))
            predictedSeizureValues = np.empty((inSeqLen+timeStepsToPredict))
            inputRowStart = 0
            inputRowEnd = inputRowStart + inSeqLen
            outputRowStart = inputRowEnd
            outputRowEnd = outputRowStart + outSeqLen
            # print ("inputRowStart={}, inputRowEnd={}, outputRowStart={}, outputRowEnd={}"
            #         .format(inputRowStart, inputRowEnd, outputRowStart, outputRowEnd))
            predictedDataset[0, inputRowStart:inputRowEnd,:numFeatures] = dataset[inputRowStart:inputRowEnd, :numFeatures]
            predictedSeizureValues[inputRowStart:inputRowEnd] = dataset[inputRowStart:inputRowEnd, numFeatures]
            while (numRemainingRows >= numRowsNeededForTest):
                predictedDataset[:, outputRowStart:outputRowEnd, :] = \
                    lstmObj.getModel().predict(predictedDataset[:, inputRowStart:inputRowEnd, :])
                for i in range(outSeqLen):
                    predictedSeizureValues[outputRowStart+i] = dnnObj.getModel().predict(predictedDataset[:, outputRowStart+i, :])
                
                inputRowStart += outSeqLen
                inputRowEnd = inputRowStart + inSeqLen
                outputRowStart = inputRowEnd
                outputRowEnd = outputRowStart + outSeqLen
                numRemainingRows -= outSeqLen
                # print ("inputRowStart={}, inputRowEnd={}, outputRowStart={}, outputRowEnd={}"
                #         .format(inputRowStart, inputRowEnd, outputRowStart, outputRowEnd))

            # print ("predictedDataset = ", predictedDataset[0, inSeqLen:min (numRows, (inSeqLen+timeStepsToPredict)), :numFeatures])
            # print ("actual dataset = ", dataset[inSeqLen:min (numRows, (inSeqLen+timeStepsToPredict)), :numFeatures])
            calculateMetrics(predictedSeizureValues[inSeqLen:], dataset[inSeqLen:,numFeatures])
            dataset = np.delete(dataset, list(range(timeStepsToPredict)), axis=0)
            numRows = dataset.shape[0]

    return