class NetworkDataUtil(object): ''' Class to load and separate data for a neural network ''' def __init__(self, files=[]): self.files = files self.fileUtil = CSVUtil() def makeSameLength(self, values0, values1): length = min(len(values0), len(values1)) - 1 return values0[:length], values1[:length] def get(self, separate=True, makeSameLength=True): values0, values1 = self.readFiles(self.files) if makeSameLength and (len(values0) != len(values1)): values0, values1 = self.makeSameLength(values0, values1) if separate: return self.buildTestSet(values0, values1) else: return self.buildFullTestSet(values0, values1) def getNInput(self): return self.nInputs def readFiles(self, files): return self.fileUtil.readData(files[0], ","), self.fileUtil.readData(files[1], ",") def buildFullTestSet(self, values0, values1): values0 = self._addClass(values0, 0.) values1 = self._addClass(values1, 1.) rawData = self._postprocData(values0, values1) self.nInputs = len(rawData[0])-1 return self.createData(self.nInputs, rawData) def buildTestSet(self, values0, values1): train0, test0 = self._preprocData(values0, 0.) train1, test1 = self._preprocData(values1, 1.) trainRawData = self._postprocData(train0, train1) testRawData = self._postprocData(test0, test1) self.nInputs = len(trainRawData[0])-1 trainData = self.createData(self.nInputs, trainRawData) testData = self.createData(self.nInputs, testRawData) return trainData, testData def _preprocData(self, values, clazz): np.random.shuffle(values) values0 = self._addClass(values, clazz) return self._separateData(values0) def _addClass(self, values, clazz): shape = values.shape clazzArray = np.full((shape[0], shape[1]+1), clazz) clazzArray[:,:-1] = values return clazzArray def _separateData(self, values): l = len(values) d = (2 * l / 3) return values[0:d], values[d:] def _postprocData(self, values0, values1): values = np.concatenate((values0, values1), axis=0) np.random.shuffle(values) return values def createXORData(self): values = [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0]] return self.createData(2, values) def createData(self, nInput, values): ds = SupervisedDataSet(nInput, N_OUTPUT) for value in values: ds.addSample(value[:nInput], value[nInput]) return ds
def __init__(self, files=[]): self.files = files self.fileUtil = CSVUtil()
def saveCSV(self, filePath, data, header, delimiter=";"): CSVUtil().writeFile(filePath, data, header, ",")
def getECGDto(self, filePath): return CSVUtil().readECGFile(filePath)
def getDtoFromCsv(self, filePath): return CSVUtil().readEEGFile(filePath)
def setUp(self): self.reader = CSVUtil()
class CSVUtilTest(BaseTest): def setUp(self): self.reader = CSVUtil() def test_readData(self): file_path = self.getData32CSV() self.reader.readData(file_path) def test_readHeader(self): file_path = self.getData32CSV() self.reader.readHeader(file_path) def testreadEEGFile(self): file_path = self.getData32CSV() self.reader.readEEGFile(file_path) def test_writeFile(self): filePath = self.PATH + "test.csv" header = ["Timestamp", "B", "C"] data = np.array([[1, 1.123456789, 2], [2, -4.123456789, 6], [3, 7.123456789, 99.123]]) self.reader.writeFile(filePath, data, header) if isfile(filePath): read = self.reader.readEEGFile(filePath) for i in range(len(data)): for j in range(len(data[i])): self.assertAlmostEqual(data[i, j], read.data[i, j], delta=0.001) self.removeFile(filePath) def test_writeStructredFile(self): filePath = self.PATH + "test_structured.csv" data = { "A": { "value": [1, 2, 3], "quality": [-1, -1, -1] }, "B": { "value": [4, 5, 6], "quality": [-2, -2, -2] }, "C": { "value": [7, 8, 9], "quality": [-3, -3, -3] } } self.reader.writeStructredFile(filePath, data) if isfile(filePath): read = self.reader.readEEGFile(filePath) for key, values in data.iteritems(): assert_array_equal(values["value"], read.getColumn(key)) self.removeFile(filePath) @unittest.skip("There should be no empty values") def test_readEEGFile_NaNValues(self): eegData = self.reader.readEEGFile(self.PATH + "example_32_empty.csv") emptyCol = eegData.getColumn("Y") self.assertTrue(np.isnan(emptyCol).any()) nonEmptyCol = eegData.getColumn("F3") self.assertFalse(np.isnan(nonEmptyCol).any()) def test_readEEGFile_SeparatorFallback(self): eegData = self.reader.readEEGFile(self.getData32CSV()) semicolonData = eegData.getColumn("F3") eegData = self.reader.readEEGFile(self.PATH + "example_32_comma.csv") commaData = eegData.getColumn("F3") self.assertTrue((semicolonData == commaData).all()) @unittest.skip("delete Z-Column leads to memory error") def test_readEEGFile_newStyle(self): _ = self.reader.readEEGFile(self.PATH + "example_1024_new.csv") def test_readEEGFile(self): self.eegData = self.reader.readEEGFile(self.getData32CSV()) self.assertTrue(self.eegData.hasEEGData) self.assertFalse(self.eegData.hasECGData) def test_readECGFile(self): self.ecgData = self.reader.readECGFile(self.PATH + "example_4096_ecg.csv") self.assertFalse(self.ecgData.hasEEGData) self.assertTrue(self.ecgData.hasECGData) def test_transformTimestamp_ecg(self): header = ["Timestamp", "ECG"] data = np.array([["05/12/2016 13:58:59.407", "3798"], ["05/12/2016 13:58:59.408", "3798"], ["05/12/2016 13:58:59.409", "3798"], ["05/12/2016 13:58:59.410", "3798"], ["05/12/2016 13:58:59.411", "3798"]]) ecgData = self.reader.transformTimestamp(header, data) self.assertAlmostEquals(float(ecgData[0][0]), 1480942739., delta=1.) def test_transformTimestamp_eeg(self): header = ["Timestamp", "F3", "X"] data = np.array([["2016-12-19 08:18:38.415000", "-3200", "0"], ["2016-12-19 08:18:38.423000", "-3171", "0"], ["2016-12-19 08:18:38.430000", "-3184", "0"], ["2016-12-19 08:18:38.438000", "-3176", "0"], ["2016-12-19 08:18:38.446000", "-3172", "0"]]) eegData = self.reader.transformTimestamp(header, data) self.assertAlmostEquals(float(eegData[0][0]), 1482131918., delta=1.)