def testShuffleSplit(self): numExamples = 10 folds = 5 indices = Sampling.shuffleSplit(folds, numExamples) for i in range(folds): self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all()) indices = Sampling.shuffleSplit(folds, numExamples, 0.5) trainSize = numExamples*0.5 for i in range(folds): self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all()) self.assertTrue(indices[i][0].shape[0] == trainSize) indices = Sampling.shuffleSplit(folds, numExamples, 0.55)
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None): numpy.random.seed(21) dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" fileName = dataDir + name + ext print("Loading data from file " + fileName) outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/" XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters) X = XY[:, :-1] y = XY[:, -1] idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split) preprocessSave(X, y, outputDir, idx)
def processParkinsonsDataset(name, numRealisations): numpy.random.seed(21) dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" fileName = dataDir + name + ".data" XY = numpy.loadtxt(fileName, delimiter=",", skiprows=1) inds = list(set(range(XY.shape[1])) - set([5, 6])) X = XY[:, inds] y1 = XY[:, 5] y2 = XY[:, 6] #We don't keep whole collections of patients split = 0.5 idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split) outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-motor/" preprocessSave(X, y1, outputDir, idx) outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-total/" preprocessSave(X, y2, outputDir, idx)
def shuffleSplit90(repetitions, numExamples): """ Take two thirds of the examples to train, and the rest to test """ return Sampling.shuffleSplit(repetitions, numExamples, 0.9)