예제 #1
0
파일: test_fss.py 프로젝트: mrihtar/orange3
 def test_select_1(self):
     gini = Gini()
     s = SelectBestFeatures(method=gini, k=1)
     data2 = s(self.titanic)
     best = max((gini(self.titanic, f), f)
                for f in self.titanic.domain.attributes)[1]
     self.assertEqual(data2.domain.attributes[0], best)
예제 #2
0
파일: test_fss.py 프로젝트: mrihtar/orange3
    def test_continuous_scores_on_discrete_features(self):
        data = Impute(self.auro_mpg)
        with self.assertRaises(ValueError):
            UnivariateLinearRegression(data)

        d1 = SelectBestFeatures(method=UnivariateLinearRegression)(data)
        self.assertEqual(len(d1.domain), len(data.domain))
예제 #3
0
파일: test_fss.py 프로젝트: chkothe/orange3
 def test_select_1(self):
     data = Table('titanic')
     gini = Gini()
     s = SelectBestFeatures(method=gini, k=1)
     data2 = s(data)
     best = max((gini(data, f), f) for f in data.domain.attributes)[1]
     self.assertEqual(data2.domain.attributes[0], best)
예제 #4
0
 def test_select_threshold(self):
     data = Table('wine')
     anova = ANOVA()
     t = 30
     data2 = SelectBestFeatures(method=anova, threshold=t)(data)
     self.assertTrue(
         all(anova(f, data) >= t for f in data2.domain.attributes))
예제 #5
0
파일: test_fss.py 프로젝트: mrihtar/orange3
 def test_defaults(self):
     fs = SelectBestFeatures(k=3)
     data2 = fs(Impute(self.auro_mpg))
     self.assertTrue(all(a.is_continuous for a in data2.domain.attributes))
     data2 = fs(self.wine)
     self.assertTrue(all(a.is_continuous for a in data2.domain.attributes))
     data2 = fs(self.titanic)
     self.assertTrue(all(a.is_discrete for a in data2.domain.attributes))
예제 #6
0
파일: test_fss.py 프로젝트: chkothe/orange3
 def test_defaults(self):
     fs = SelectBestFeatures(k=3)
     data2 = fs(Impute(Table('auto-mpg')))
     self.assertTrue(all(isinstance(a, ContinuousVariable) for a in data2.domain.attributes))
     data2 = fs(Table('wine'))
     self.assertTrue(all(isinstance(a, ContinuousVariable) for a in data2.domain.attributes))
     data2 = fs(Table('titanic'))
     self.assertTrue(all(isinstance(a, DiscreteVariable) for a in data2.domain.attributes))
예제 #7
0
파일: test_fss.py 프로젝트: wibrt/orange3
 def test_defaults(self):
     fs = SelectBestFeatures(k=3)
     data2 = fs(Impute(Table('auto-mpg')))
     self.assertTrue(all(a.is_continuous for a in data2.domain.attributes))
     data2 = fs(Table('wine'))
     self.assertTrue(all(a.is_continuous for a in data2.domain.attributes))
     data2 = fs(Table('titanic'))
     self.assertTrue(all(a.is_discrete for a in data2.domain.attributes))
예제 #8
0
 def test_select_threshold(self):
     anova = ANOVA()
     t = 30
     data2 = SelectBestFeatures(method=anova,
                                threshold=t)(self.heart_disease)
     self.assertTrue(
         all(
             anova(self.heart_disease, f) >= t
             for f in data2.domain.attributes))
예제 #9
0
    def test_select_2(self):
        gini = Gini()
        # 100th percentile = selection of top1 attribute
        sel1 = SelectBestFeatures(method=gini, k=1.0)
        data2 = sel1(self.titanic)
        best = max((gini(self.titanic, f), f)
                   for f in self.titanic.domain.attributes)[1]
        self.assertEqual(data2.domain.attributes[0], best)

        # no k and no threshold, select all attributes
        sel2 = SelectBestFeatures(method=gini, k=0)
        data2 = sel2(self.titanic)
        self.assertEqual(len(data2.domain.attributes),
                         len(self.titanic.domain.attributes))

        # 31% = selection of top  (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.31)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # 35% = selection of top  (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.35)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # 1% = select one (out of 3) attributes
        sel3 = SelectBestFeatures(method=gini, k=0.01)
        data2 = sel3(self.titanic)
        self.assertEqual(len(data2.domain.attributes), 1)

        # number of selected attrs should be relative to number of current input attrs
        sel3 = SelectBestFeatures(method=gini, k=1.0)
        data2 = sel3(self.heart_disease)
        self.assertEqual(len(data2.domain.attributes), 13)
예제 #10
0
파일: test_fss.py 프로젝트: mrihtar/orange3
    def test_discrete_scores_on_continuous_features(self):
        c = self.iris.columns
        for method in (Gini, Chi2):
            d1 = SelectBestFeatures(method=method)(self.iris)
            expected = \
                (c.petal_length, c.petal_width, c.sepal_length, c.sepal_width)
            self.assertSequenceEqual(d1.domain.attributes, expected)

            scores = method(d1)
            self.assertEqual(len(scores), 4)

            score = method(d1, c.petal_length)
            self.assertIsInstance(score, float)
예제 #11
0
 def test_mixed_features(self):
     data = Table('auto-mpg')
     data.X = Imputer().fit_transform(data.X)
     s = SelectBestFeatures(method=UnivariateLinearRegression(), k=2)
     data2 = s(data)
     self.assertEqual(
         sum(1 for f in data2.domain.attributes
             if isinstance(f, ContinuousVariable)), 2)
     self.assertEqual(
         sum(1 for f in data2.domain.attributes
             if isinstance(f, DiscreteVariable)),
         sum(1 for f in data.domain.attributes
             if isinstance(f, DiscreteVariable)))
예제 #12
0
    def test_discrete_scores_on_continuous_features(self):
        c = self.iris.columns
        for method in (Gini(), Chi2()):
            d1 = SelectBestFeatures(method=method)(self.iris)
            expected = \
                (c.petal_length, c.petal_width, c.sepal_length, c.sepal_width)
            self.assertSequenceEqual(d1.domain.attributes, expected)

            scores = method(d1)
            self.assertEqual(len(scores), 4)

            score = method(d1, c.petal_length)
            self.assertEqual(score.ndim, 0)  # a scalar
            self.assertTrue(np.issubdtype(score.dtype, float))
예제 #13
0
파일: test_fss.py 프로젝트: mrihtar/orange3
 def test_error_when_using_regression_score_on_classification_data(self):
     s = SelectBestFeatures(method=UnivariateLinearRegression(), k=3)
     with self.assertRaises(ValueError):
         s(self.wine)
예제 #14
0
 def test_error(self):
     data = Table('wine')
     s = SelectBestFeatures(method=UnivariateLinearRegression(), k=3)
     with self.assertRaises(ValueError):
         s(data)