Esempio n. 1
0
 def test_select_1(self):
     gini = Gini()
     s = SelectBestFeatures(method=gini, k=1)
     data2 = s(self.titanic)
     best = max((gini(self.titanic, f), f)
                for f in self.titanic.domain.attributes)[1]
     self.assertEqual(data2.domain.attributes[0], best)
Esempio n. 2
0
 def test_classless(self):
     classless = Table(Domain(self.zoo.domain.attributes), self.zoo[:,
                                                                    0:-1])
     scorers = [Gini(), InfoGain(), GainRatio()]
     for scorer in scorers:
         with self.assertRaises(ValueError):
             scorer(classless, 0)
Esempio n. 3
0
    def test_select_2(self):
        gini = Gini()
        # 100th percentile = selection of top1 attribute
        sel1 = SelectBestFeatures(method=gini, k=1.0)
        data2 = sel1(self.titanic)
        best = max((gini(self.titanic, f), f)
                   for f in self.titanic.domain.attributes)[1]
        self.assertEqual(data2.domain.attributes[0], best)

        # no k and no threshold, select all attributes
        sel2 = SelectBestFeatures(method=gini, k=0)
        data2 = sel2(self.titanic)
        self.assertEqual(len(data2.domain.attributes),
                         len(self.titanic.domain.attributes))

        # 31% = selection of top  (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.31)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # 35% = selection of top  (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.35)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # 1% = select one (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.01)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # number of selected attrs should be relative to number of current input attrs
        sel3 = SelectBestFeatures(method=gini, k=1.0)
        data2 = sel3(self.heart_disease)
        self.assertEqual(len(data2.domain.attributes), 13)
Esempio n. 4
0
 def test_select_1(self):
     data = Table('titanic')
     gini = Gini()
     s = SelectBestFeatures(method=gini, k=1)
     data2 = s(data)
     best = max((gini(data, f), f) for f in data.domain.attributes)[1]
     self.assertEqual(data2.domain.attributes[0], best)
Esempio n. 5
0
    def test_wrong_class_type(self):
        scorers = [Gini(), InfoGain(), GainRatio()]
        for scorer in scorers:
            with self.assertRaises(ValueError):
                scorer(self.housing, 0)

        with self.assertRaises(ValueError):
            Chi2()(self.housing, 0)
        with self.assertRaises(ValueError):
            ANOVA()(self.housing, 2)
        UnivariateLinearRegression()(self.housing, 2)
Esempio n. 6
0
    def test_discrete_scores_on_continuous_features(self):
        c = self.iris.columns
        for method in (Gini(), Chi2()):
            d1 = SelectBestFeatures(method=method)(self.iris)
            expected = \
                (c.petal_length, c.petal_width, c.sepal_length, c.sepal_width)
            self.assertSequenceEqual(d1.domain.attributes, expected)

            scores = method(d1)
            self.assertEqual(len(scores), 4)

            score = method(d1, c.petal_length)
            self.assertIsInstance(score, float)
Esempio n. 7
0
 def test_gini(self):
     scorer = Gini()
     correct = [0.23786, 0.20855, 0.26235, 0.29300, 0.11946]
     np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)],
                                    correct,
                                    decimal=5)
Esempio n. 8
0
 def test_gini(self):
     scorer = Gini()
     correct = [0.11893, 0.10427, 0.13117, 0.14650, 0.05973]
     np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)],
                                    correct,
                                    decimal=5)