コード例 #1
0
ファイル: SamplingTest.py プロジェクト: malcolmreynolds/APGL
    def testShuffleSplit(self):
        numExamples = 10
        folds = 5

        indices = Sampling.shuffleSplit(folds, numExamples)
        
        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
        
        indices = Sampling.shuffleSplit(folds, numExamples, 0.5)
        trainSize = numExamples*0.5

        for i in range(folds):
            self.assertTrue((numpy.union1d(indices[i][0], indices[i][1]) == numpy.arange(numExamples)).all())
            self.assertTrue(indices[i][0].shape[0] == trainSize)

        indices = Sampling.shuffleSplit(folds, numExamples, 0.55)
コード例 #2
0
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ext
    
    print("Loading data from file " + fileName)
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"

    XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
    X = XY[:, :-1]
    y = XY[:, -1]
    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
    preprocessSave(X, y, outputDir, idx)
コード例 #3
0
def processParkinsonsDataset(name, numRealisations):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ".data"
    

    XY = numpy.loadtxt(fileName, delimiter=",", skiprows=1)
    inds = list(set(range(XY.shape[1])) - set([5, 6]))
    X = XY[:, inds]

    y1 = XY[:, 5]
    y2 = XY[:, 6]
    #We don't keep whole collections of patients
    split = 0.5

    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)

    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-motor/"
    preprocessSave(X, y1, outputDir, idx)
    
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "-total/"
    preprocessSave(X, y2, outputDir, idx)
コード例 #4
0
ファイル: BenchmarkExp.py プロジェクト: malcolmreynolds/APGL
def shuffleSplit90(repetitions, numExamples):
    """
    Take two thirds of the examples to train, and the rest to test

    """
    return Sampling.shuffleSplit(repetitions, numExamples, 0.9)