Ejemplo n.º 1
0
    def test_split_classifier(self):
        ds = self.data_bin_1
        clf = SplitClassifier(
            clf=SameSignClassifier(),
            enable_ca=['stats', 'training_stats', 'feature_ids'])
        clf.train(ds)  # train the beast
        error = clf.ca.stats.error
        tr_error = clf.ca.training_stats.error

        clf2 = clf.clone()
        cv = CrossValidation(clf2,
                             NFoldPartitioner(),
                             postproc=mean_sample(),
                             enable_ca=['stats', 'training_stats'])
        cverror = cv(ds)
        cverror = cverror.samples.squeeze()
        tr_cverror = cv.ca.training_stats.error

        self.assertEqual(
            error,
            cverror,
            msg="We should get the same error using split classifier as"
            " using CrossValidation. Got %s and %s" % (error, cverror))

        self.assertEqual(
            tr_error,
            tr_cverror,
            msg="We should get the same training error using split classifier as"
            " using CrossValidation. Got %s and %s" % (tr_error, tr_cverror))

        self.assertEqual(clf.ca.stats.percent_correct,
                         100,
                         msg="Dummy clf should train perfectly")
        # CV and SplitClassifier should get the same confusion matrices
        assert_array_equal(clf.ca.stats.matrix, cv.ca.stats.matrix)

        self.assertEqual(len(clf.ca.stats.sets),
                         len(ds.UC),
                         msg="Should have 1 confusion per each split")
        self.assertEqual(
            len(clf.clfs),
            len(ds.UC),
            msg="Should have number of classifiers equal # of epochs")
        self.assertEqual(clf.predict(ds.samples),
                         list(ds.targets),
                         msg="Should classify correctly")

        # feature_ids must be list of lists, and since it is not
        # feature-selecting classifier used - we expect all features
        # to be utilized
        #  NOT ANYMORE -- for BoostedClassifier we have now union of all
        #  used features across slave classifiers. That makes
        #  semantics clear. If you need to get deeper -- use upcoming
        #  harvesting facility ;-)
        # self.assertEqual(len(clf.feature_ids), len(ds.uniquechunks))
        # self.assertTrue(np.array([len(ids)==ds.nfeatures
        #                         for ids in clf.feature_ids]).all())

        # Just check if we get it at all ;-)
        summary = clf.summary()
Ejemplo n.º 2
0
    def test_split_classifier(self):
        ds = self.data_bin_1
        clf = SplitClassifier(clf=SameSignClassifier(),
                enable_ca=['stats', 'training_stats',
                               'feature_ids'])
        clf.train(ds)                   # train the beast
        error = clf.ca.stats.error
        tr_error = clf.ca.training_stats.error

        clf2 = clf.clone()
        cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(),
            enable_ca=['stats', 'training_stats'])
        cverror = cv(ds)
        cverror = cverror.samples.squeeze()
        tr_cverror = cv.ca.training_stats.error

        self.assertEqual(error, cverror,
                msg="We should get the same error using split classifier as"
                    " using CrossValidation. Got %s and %s"
                    % (error, cverror))

        self.assertEqual(tr_error, tr_cverror,
                msg="We should get the same training error using split classifier as"
                    " using CrossValidation. Got %s and %s"
                    % (tr_error, tr_cverror))

        self.assertEqual(clf.ca.stats.percent_correct,
                             100,
                             msg="Dummy clf should train perfectly")
        # CV and SplitClassifier should get the same confusion matrices
        assert_array_equal(clf.ca.stats.matrix,
                           cv.ca.stats.matrix)

        self.assertEqual(len(clf.ca.stats.sets),
                             len(ds.UC),
                             msg="Should have 1 confusion per each split")
        self.assertEqual(len(clf.clfs), len(ds.UC),
                             msg="Should have number of classifiers equal # of epochs")
        self.assertEqual(clf.predict(ds.samples), list(ds.targets),
                             msg="Should classify correctly")

        # feature_ids must be list of lists, and since it is not
        # feature-selecting classifier used - we expect all features
        # to be utilized
        #  NOT ANYMORE -- for BoostedClassifier we have now union of all
        #  used features across slave classifiers. That makes
        #  semantics clear. If you need to get deeper -- use upcoming
        #  harvesting facility ;-)
        # self.assertEqual(len(clf.feature_ids), len(ds.uniquechunks))
        # self.assertTrue(np.array([len(ids)==ds.nfeatures
        #                         for ids in clf.feature_ids]).all())

        # Just check if we get it at all ;-)
        summary = clf.summary()