예제 #1
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless( data.nsamples == 120 )
        self.failUnless( data.nfeatures == 2 )
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
                transerror,
                NFoldSplitter(cvtype=1),
                enable_ca=['confusion', 'training_confusion',
                               'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
예제 #2
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless(data.nsamples == 120)
        self.failUnless(data.nfeatures == 2)
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
            transerror,
            NFoldSplitter(cvtype=1),
            enable_ca=['confusion', 'training_confusion', 'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all()
                        and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
예제 #3
0
 def test_harvesting(self):
     # get a dataset with a very high SNR
     data = get_mv_pattern(10)
     # do crossval with default errorfx and 'mean' combiner
     transerror = TransferError(clfswh['linear'][0])
     cv = CrossValidatedTransferError(
             transerror,
             NFoldSplitter(cvtype=1),
             harvest_attribs=['transerror.clf.ca.training_time'])
     result = cv(data)
     ok_(cv.ca.harvested.has_key('transerror.clf.ca.training_time'))
     assert_equal(len(cv.ca.harvested['transerror.clf.ca.training_time']),
                  len(data.UC))
예제 #4
0
 def test_harvesting(self):
     # get a dataset with a very high SNR
     data = get_mv_pattern(10)
     # do crossval with default errorfx and 'mean' combiner
     transerror = TransferError(clfswh['linear'][0])
     cv = CrossValidatedTransferError(
         transerror,
         NFoldSplitter(cvtype=1),
         harvest_attribs=['transerror.clf.ca.training_time'])
     result = cv(data)
     ok_(cv.ca.harvested.has_key('transerror.clf.ca.training_time'))
     assert_equal(len(cv.ca.harvested['transerror.clf.ca.training_time']),
                  len(data.UC))
예제 #5
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless( data.nsamples == 120 )
        self.failUnless( data.nfeatures == 2 )
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        cv = CrossValidation(sample_clf_nl, NFoldPartitioner(),
                enable_ca=['stats', 'training_stats'])
#                               'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all())
예제 #6
0
    def test_noise_classification(self):
        # get a dataset with a very high SNR
        data = get_mv_pattern(10)

        # do crossval with default errorfx and 'mean' combiner
        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(transerror, NFoldSplitter(cvtype=1)) 

        # must return a scalar value
        result = cv(data)
        # must be perfect
        self.failUnless((result.samples < 0.05).all())

        # do crossval with permuted regressors
        cv = CrossValidatedTransferError(transerror,
                  NFoldSplitter(cvtype=1, permute=True, nrunspersplit=10) )
        results = cv(data)

        # must be at chance level
        pmean = np.array(results).mean()
        self.failUnless( pmean < 0.58 and pmean > 0.42 )
예제 #7
0
    def test_noise_classification(self):
        # get a dataset with a very high SNR
        data = get_mv_pattern(10)

        # do crossval with default errorfx and 'mean' combiner
        cv = CrossValidation(sample_clf_nl, NFoldPartitioner())

        # must return a scalar value
        result = cv(data)
        # must be perfect
        self.failUnless((result.samples < 0.05).all())

        # do crossval with permuted regressors
        cv = CrossValidation(sample_clf_nl,
                        ChainNode([NFoldPartitioner(),
                            AttributePermutator('targets', count=10)],
                                  space='partitions'))
        results = cv(data)

        # must be at chance level
        pmean = np.array(results).mean()
        self.failUnless( pmean < 0.58 and pmean > 0.42 )
예제 #8
0
    def test_noise_classification(self):
        # get a dataset with a very high SNR
        data = get_mv_pattern(10)

        # do crossval with default errorfx and 'mean' combiner
        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(transerror, NFoldSplitter(cvtype=1))

        # must return a scalar value
        result = cv(data)
        # must be perfect
        self.failUnless((result.samples < 0.05).all())

        # do crossval with permuted regressors
        cv = CrossValidatedTransferError(
            transerror,
            NFoldSplitter(cvtype=1, permute_attr='targets', nrunspersplit=10))
        results = cv(data)

        # must be at chance level
        pmean = np.array(results).mean()
        self.failUnless(pmean < 0.58 and pmean > 0.42)