Ejemplo n.º 1
0
 def __init__(self, numFolds, data, labels):
     self._numFolds = numFolds
     self._curFold = 0
     self._trainData = array_split(data, self._numFolds)
     self._testData = None
     self._trainLabels = array_split(labels, self._numFolds)
     self._testLabels = None
Ejemplo n.º 2
0
    def test_integer_split_2D_rows(self):
        a = np.array([np.arange(10), np.arange(10)])
        res = array_split(a, 3, axis=0)
        tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), np.zeros((0, 10))]
        compare_results(res, tgt)
        assert_(a.dtype.type is res[-1].dtype.type)

        # Same thing for manual splits:
        res = array_split(a, [0, 1, 2], axis=0)
        tgt = [np.zeros((0, 10)), np.array([np.arange(10)]), np.array([np.arange(10)])]
        compare_results(res, tgt)
        assert_(a.dtype.type is res[-1].dtype.type)
Ejemplo n.º 3
0
def crossValidation(numFolds, data, labels, algorithm, accuracyList, learningCurveList, numLearningCurveIterations, learningCurveIndexMod):
    dataFolds = array_split(data, numFolds)
    labelFolds = array_split(labels, numFolds)
    for testIndex in range(numFolds):
        print testIndex,
        testData = dataFolds.pop(testIndex)
        testLabels = labelFolds.pop(testIndex)
        trainData = vstack(dataFolds)
        trainLabels = hstack(labelFolds)
        accuracyList.append(algorithm(trainData, trainLabels, testData, testLabels))
        learningCurve(algorithm, learningCurveList, trainData, trainLabels, testData, testLabels, numLearningCurveIterations, learningCurveIndexMod)
        dataFolds.insert(testIndex, testData)
        labelFolds.insert(testIndex, testLabels)
    print ''
Ejemplo n.º 4
0
    def test_integer_split_2D_rows(self):
        a = np.array([np.arange(10), np.arange(10)])
        res = array_split(a, 3, axis=0)
        tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]),
               np.zeros((0, 10))]
        compare_results(res, tgt)
        assert_(a.dtype.type is res[-1].dtype.type)

        # Same thing for manual splits:
        res = array_split(a, [0, 1, 2], axis=0)
        tgt = [np.zeros((0, 10)), np.array([np.arange(10)]),
               np.array([np.arange(10)])]
        compare_results(res, tgt)
        assert_(a.dtype.type is res[-1].dtype.type)
Ejemplo n.º 5
0
 def test_integer_split_2D_rows_greater_max_int32(self):
     a = np.broadcast_to([0], (1 << 32, 2))
     res = array_split(a, 4)
     chunk = np.broadcast_to([0], (1 << 30, 2))
     tgt = [chunk] * 4
     for i in range(len(tgt)):
         assert_equal(res[i].shape, tgt[i].shape)
Ejemplo n.º 6
0
    def test_two_dimensional_two_integer_remainder_split(self):
        matrix = np.reshape(np.arange(16), (4, 4))

        res = array_split(matrix, [3, 3], 0, True)
        desired = [[[0, 1], [4, 5]], [[2], [6]], [[3], [7]], [[8, 9]], [[10]],
                   [[11]], [[12, 13]], [[14]], [[15]]]
        compare_results(res, desired)
Ejemplo n.º 7
0
 def test_integer_split_2D_cols(self):
     a = np.array([np.arange(10), np.arange(10)])
     res = array_split(a, 3, axis=-1)
     desired = [np.array([np.arange(4), np.arange(4)]),
                np.array([np.arange(4, 7), np.arange(4, 7)]),
                np.array([np.arange(7, 10), np.arange(7, 10)])]
     compare_results(res, desired)
Ejemplo n.º 8
0
 def test_index_split_simple(self):
     a = np.arange(10)
     indices = [1, 5, 7]
     res = array_split(a, indices, axis=-1)
     desired = [np.arange(0, 1), np.arange(1, 5), np.arange(5, 7),
                np.arange(7, 10)]
     compare_results(res, desired)
Ejemplo n.º 9
0
 def test_index_split_high_bound(self):
     a = np.arange(10)
     indices = [0, 5, 7, 10, 12]
     res = array_split(a, indices, axis=-1)
     desired = [np.array([]), np.arange(0, 5), np.arange(5, 7),
                np.arange(7, 10), np.array([]), np.array([])]
     compare_results(res, desired)
Ejemplo n.º 10
0
 def test_integer_split_2D_rows_greater_max_int32(self):
     a = np.broadcast_to([0], (1 << 32, 2))
     res = array_split(a, 4)
     chunk = np.broadcast_to([0], (1 << 30, 2))
     tgt = [chunk] * 4
     for i in range(len(tgt)):
         assert_equal(res[i].shape, tgt[i].shape)
Ejemplo n.º 11
0
 def test_index_split_simple(self):
     a = np.arange(10)
     indices = [1, 5, 7]
     res = array_split(a, indices, axis=-1)
     desired = [np.arange(0, 1), np.arange(1, 5), np.arange(5, 7),
                np.arange(7, 10)]
     compare_results(res, desired)
Ejemplo n.º 12
0
 def test_integer_split_2D_cols(self):
     a = np.array([np.arange(10), np.arange(10)])
     res = array_split(a, 3, axis=-1)
     desired = [np.array([np.arange(4), np.arange(4)]),
                np.array([np.arange(4, 7), np.arange(4, 7)]),
                np.array([np.arange(7, 10), np.arange(7, 10)])]
     compare_results(res, desired)
Ejemplo n.º 13
0
 def test_index_split_high_bound(self):
     a = np.arange(10)
     indices = [0, 5, 7, 10, 12]
     res = array_split(a, indices, axis=-1)
     desired = [np.array([]), np.arange(0, 5), np.arange(5, 7),
                np.arange(7, 10), np.array([]), np.array([])]
     compare_results(res, desired)
Ejemplo n.º 14
0
 def test_integer_split_2D_default(self):
     """ This will fail if we change default axis
     """
     a = np.array([np.arange(10), np.arange(10)])
     res = array_split(a, 3)
     tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), np.zeros((0, 10))]
     compare_results(res, tgt)
     assert_(a.dtype.type is res[-1].dtype.type)
Ejemplo n.º 15
0
 def test_integer_split_2D_default(self):
     """ This will fail if we change default axis
     """
     a = np.array([np.arange(10), np.arange(10)])
     res = array_split(a, 3)
     tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]),
            np.zeros((0, 10))]
     compare_results(res, tgt)
     assert_(a.dtype.type is res[-1].dtype.type)
Ejemplo n.º 16
0
 datasets = getDataSets(dataDir, ['ionosphere', 'iris', 'wine'])
 #datasets = getDataSets(dataDir, ['by_hand'])
 #datasets = getDataSets(dataDir, ['ionosphere'])
 
 
 
 tp = ThreadPool(4)
     
 for name, (data, labels) in datasets.iteritems():
     datasetOutDir = getDatasetOutDir(outDir, name)
     print "Computing on", name
 
     #do a split into overall test and overall train
     overallTestTrainRatio = 1.0 / 3.0
     overallTestTrainSplitIndex = array([int(overallTestTrainRatio * len(data))])
     overallTestData, overallTrainData = array_split(data, array([overallTestTrainSplitIndex]))
     overallTestLabels, overallTrainLabels = array_split(labels, overallTestTrainSplitIndex)        
             
     #test a whole bunch of generic kernels on the overall split data
     fileIdentifier = 'overall'
     #print "train:", overallTrainData
     #print "test:", overallTestData
            
     compareAlgorithmsOnSameKernels(tp, overallTrainData, overallTrainLabels, overallTestData, overallTestLabels, name, fileIdentifier)
     
     #now, try to find an optimal kernel for either svm or kfd
     #do it for each kernel type
     numOptimizationFolds = 3
     fileIdentifier = 'optimized'
     compareAlgorithmsOnOptimizedKernel(tp, overallTrainData, overallTrainLabels, overallTestData, overallTestLabels, numOptimizationFolds, datasetOutDir, name, fileIdentifier)
     
Ejemplo n.º 17
0
         #remove ignored columns and class column, in reverse sorted order
         #do it in reverse sorted order so the indexes stay correct
         for removeCol in sorted(ignoreColList + [classCol], reverse=True):
             if removeCol == classCol:
                 label = features.pop(removeCol)
             else:
                 features.pop(removeCol)
         datasetDict[label].append(features)
     origDataFile.close()
     
 
     #make it into a 2 class problem by lumping classes together
     #don't have rhyme or reason - don't want to favor one class or another, or make our data artificially clean
     numOrigClasses = len(datasetDict.keys())
     #split into 2, possibly unequal, groups of class labels
     newClassMap = array_split(datasetDict.keys(), 2)
     
     #reorganize the data
     dataWithNewLabelMap = defaultdict(list)
     
     for newClassLabel, oldClassLabelList in enumerate(newClassMap):
         for oldClassLabel in oldClassLabelList:
             for featureRow in datasetDict[oldClassLabel]:
                 dataWithNewLabelMap[newClassLabel].append(featureRow)
     
     #make the two datasets the same size
     dataWithNewLabelTupleList = []
     minClassSize = min([len(x) for x in dataWithNewLabelMap.values()])
     for newClassLabel, featureRowList in dataWithNewLabelMap.iteritems():
         for featureRow in featureRowList[:minClassSize]:
             dataWithNewLabelTupleList.append((featureRow, newClassLabel))
Ejemplo n.º 18
0
    def test_integer_split(self):
        a = np.arange(10)
        res = array_split(a, 1)
        desired = [np.arange(10)]
        compare_results(res, desired)

        res = array_split(a, 2)
        desired = [np.arange(5), np.arange(5, 10)]
        compare_results(res, desired)

        res = array_split(a, 3)
        desired = [np.arange(4), np.arange(4, 7), np.arange(7, 10)]
        compare_results(res, desired)

        res = array_split(a, 4)
        desired = [np.arange(3), np.arange(3, 6), np.arange(6, 8),
                   np.arange(8, 10)]
        compare_results(res, desired)

        res = array_split(a, 5)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 8), np.arange(8, 10)]
        compare_results(res, desired)

        res = array_split(a, 6)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 8), np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 7)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 8)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 5),
                   np.arange(5, 6), np.arange(6, 7), np.arange(7, 8),
                   np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 9)
        desired = [np.arange(2), np.arange(2, 3), np.arange(3, 4),
                   np.arange(4, 5), np.arange(5, 6), np.arange(6, 7),
                   np.arange(7, 8), np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 10)
        desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3),
                   np.arange(3, 4), np.arange(4, 5), np.arange(5, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 11)
        desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3),
                   np.arange(3, 4), np.arange(4, 5), np.arange(5, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10), np.array([])]
        compare_results(res, desired)
Ejemplo n.º 19
0
    def test_integer_split(self):
        a = np.arange(10)
        res = array_split(a, 1)
        desired = [np.arange(10)]
        compare_results(res, desired)

        res = array_split(a, 2)
        desired = [np.arange(5), np.arange(5, 10)]
        compare_results(res, desired)

        res = array_split(a, 3)
        desired = [np.arange(4), np.arange(4, 7), np.arange(7, 10)]
        compare_results(res, desired)

        res = array_split(a, 4)
        desired = [np.arange(3), np.arange(3, 6), np.arange(6, 8),
                   np.arange(8, 10)]
        compare_results(res, desired)

        res = array_split(a, 5)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 8), np.arange(8, 10)]
        compare_results(res, desired)

        res = array_split(a, 6)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 8), np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 7)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 8)
        desired = [np.arange(2), np.arange(2, 4), np.arange(4, 5),
                   np.arange(5, 6), np.arange(6, 7), np.arange(7, 8),
                   np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 9)
        desired = [np.arange(2), np.arange(2, 3), np.arange(3, 4),
                   np.arange(4, 5), np.arange(5, 6), np.arange(6, 7),
                   np.arange(7, 8), np.arange(8, 9), np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 10)
        desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3),
                   np.arange(3, 4), np.arange(4, 5), np.arange(5, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10)]
        compare_results(res, desired)

        res = array_split(a, 11)
        desired = [np.arange(1), np.arange(1, 2), np.arange(2, 3),
                   np.arange(3, 4), np.arange(4, 5), np.arange(5, 6),
                   np.arange(6, 7), np.arange(7, 8), np.arange(8, 9),
                   np.arange(9, 10), np.array([])]
        compare_results(res, desired)