Example #1
0
 def test_unpartitioned_cv(self):
     data = get_mv_pattern(10)
     # only one big chunk
     data.sa.chunks[:] = 1
     cv = CrossValidation(sample_clf_nl, NFoldPartitioner())
     # need to fail, because it can't be split into training and testing
     assert_raises(ValueError, cv, data)
Example #2
0
    def test_noise_classification(self):
        # get a dataset with a very high SNR
        data = get_mv_pattern(10)

        # do crossval with default errorfx and 'mean' combiner
        cv = CrossValidation(sample_clf_nl, NFoldPartitioner())

        # must return a scalar value
        result = cv(data)
        # must be perfect
        self.assertTrue((result.samples < 0.05).all())

        # do crossval with permuted regressors
        cv = CrossValidation(
            sample_clf_nl,
            ChainNode(
                [NFoldPartitioner(),
                 AttributePermutator('targets', count=10)],
                space='partitions'))
        results = cv(data)

        # results must not be the same
        self.assertTrue(len(np.unique(results.samples)) > 1)

        # must be at chance level
        pmean = np.array(results).mean()
        self.assertTrue(pmean < 0.58 and pmean > 0.42)
Example #3
0
 def test_unpartitioned_cv(self):
     data = get_mv_pattern(10)
     # only one big chunk
     data.sa.chunks[:] = 1
     cv = CrossValidation(sample_clf_nl, NFoldPartitioner())
     # need to fail, because it can't be split into training and testing
     assert_raises(ValueError, cv, data)
Example #4
0
    def test_noise_classification(self):
        # get a dataset with a very high SNR
        data = get_mv_pattern(10)

        # do crossval with default errorfx and 'mean' combiner
        cv = CrossValidation(sample_clf_nl, NFoldPartitioner())

        # must return a scalar value
        result = cv(data)
        # must be perfect
        self.assertTrue((result.samples < 0.05).all())

        # do crossval with permuted regressors
        cv = CrossValidation(sample_clf_nl,
                        ChainNode([NFoldPartitioner(),
                            AttributePermutator('targets', count=10)],
                                  space='partitions'))
        results = cv(data)

        # results must not be the same
        self.assertTrue(len(np.unique(results.samples))>1)

        # must be at chance level
        pmean = np.array(results).mean()
        self.assertTrue( pmean < 0.58 and pmean > 0.42 )
Example #5
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids("samples")

        self.assertTrue(data.nsamples == 120)
        self.assertTrue(data.nfeatures == 2)
        self.assertTrue((data.sa.targets == [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] * 6).all())
        self.assertTrue((data.sa.chunks == [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        cv = CrossValidation(sample_clf_nl, NFoldPartitioner(), enable_ca=["stats", "training_stats"])
        #                               'samples_error'])

        results = cv(data)
        self.assertTrue((results.samples < 0.2).all() and (results.samples >= 0.0).all())
Example #6
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.assertTrue(data.nsamples == 120)
        self.assertTrue(data.nfeatures == 2)
        self.assertTrue(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.assertTrue(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        cv = CrossValidation(sample_clf_nl,
                             NFoldPartitioner(),
                             enable_ca=['stats', 'training_stats'])
        #                               'samples_error'])

        results = cv(data)
        self.assertTrue((results.samples < 0.2).all()
                        and (results.samples >= 0.0).all())