Exemple #1
0
    def test_binary_decorator(self):
        ds = dataset_wizard(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ],
                     targets=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp'])
        testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ]
        # labels [s]ame/[d]ifferent (sign), and [p]ositive/[n]egative first element

        clf = SameSignClassifier()
        # lets create classifier to descriminate only between same/different,
        # which is a primary task of SameSignClassifier
        bclf1 = BinaryClassifier(clf=clf,
                                 poslabels=['sp', 'sn'],
                                 neglabels=['dp', 'dn'])

        orig_labels = ds.targets[:]
        bclf1.train(ds)

        self.assertTrue(bclf1.predict(testdata) ==
                        [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'],
                         ['dp', 'dn'], ['dp', 'dn']])

        self.assertTrue((ds.targets == orig_labels).all(),
                        msg="BinaryClassifier should not alter labels")
Exemple #2
0
    def test_binary_decorator(self):
        ds = dataset_wizard(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ],
                     targets=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp'])
        testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ]
        # labels [s]ame/[d]ifferent (sign), and [p]ositive/[n]egative first element

        clf = SameSignClassifier()
        # lets create classifier to descriminate only between same/different,
        # which is a primary task of SameSignClassifier
        bclf1 = BinaryClassifier(clf=clf,
                                 poslabels=['sp', 'sn'],
                                 neglabels=['dp', 'dn'])

        orig_labels = ds.targets[:]
        bclf1.train(ds)

        self.assertTrue(bclf1.predict(testdata) ==
                        [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'],
                         ['dp', 'dn'], ['dp', 'dn']])

        self.assertTrue((ds.targets == orig_labels).all(),
                        msg="BinaryClassifier should not alter labels")
def test_multiclass_without_combiner_sens(clf):
    ds = datasets['uni3small'].copy()
    # do the clone since later we will compare sensitivities and need it
    # independently trained etc
    mclf = MulticlassClassifier(clf.clone(), combiner=None)

    # We have lots of sandwiching
    #    Multiclass.clfs -> [BinaryClassifier] -> clf
    # where BinaryClassifier's estimates are binarized.
    # Let's also check that we are getting sensitivities correctly.
    # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break
    # it and none tests picked it up, so here we will test that sensitivities
    # are computed and labeled correctly

    # verify that all kinds of results on two classes are identical to the ones
    # if obtained running it without MulticlassClassifier
    # ds = ds[:, 0]  #  uncomment out to ease/speed up troubleshooting
    ds2 = ds.select(sadict=dict(targets=['L1', 'L2']))
    # we will train only on one chunk so we could get "realistic" (not just
    # overfit) predictions
    ds2_train = ds2.select(sadict=dict(chunks=ds.UC[:1]))

    # also consider simpler BinaryClassifier to easier pin point the problem
    # and be explicit about what is positive and what is negative label(s)
    bclf = BinaryClassifier(clf.clone(), poslabels=['L2'], neglabels=['L1'])

    predictions = []
    clfs = [clf, bclf, mclf]
    for c in clfs:
        c.ca.enable('all')
        c.train(ds2_train)
        predictions.append(c.predict(ds2))
    p1, bp1, mp1 = predictions

    assert_equal(p1, bp1)

    # ATM mclf.predict returns dataset (with fa.targets to list pairs of targets
    # used I guess) while p1 is just a list.
    def assert_list_equal_to_ds(l, ds):
        assert_equal(ds.shape, (len(l), 1))
        assert_array_equal(l, ds.samples[:, 0])

    assert_list_equal_to_ds(p1, mp1)

    # but if we look at sensitivities
    s1, bs1, ms1 = [c.get_sensitivity_analyzer()(ds2) for c in clfs]
    # Do ground checks for s1
    nonbogus_target = ds2.fa.nonbogus_targets[0]

    # if there was a feature with signal, we know what to expect!:
    # such assignments are randomized, so we might not have signal in that
    # single feature we chose to test with
    if nonbogus_target and nonbogus_target in ds2.UT:
        # that in the pair of labels it would be 2nd one if positive sensitivity
        # or 1st one is negative
        # with classifier we try (SVM) should be pairs of labels
        assert isinstance(s1.T[0], tuple)
        assert_equal(len(s1), 1)
        assert_equal(s1.T[0][int(s1.samples[0, 0] > 0)], nonbogus_target)

    # And in either case we could check that we are getting identical results!
    # lrn_index is unique to ms1 and "ignore_sa" to assert_datasets_equal still
    # compares for the keys to be present in both, so does not help
    ms1.sa.pop('lrn_index')

    assert_datasets_equal(s1, bs1)
    # and here we get a "problem"!
    assert_datasets_equal(s1, ms1)