def test_select_1(self): gini = Gini() s = SelectBestFeatures(method=gini, k=1) data2 = s(self.titanic) best = max((gini(self.titanic, f), f) for f in self.titanic.domain.attributes)[1] self.assertEqual(data2.domain.attributes[0], best)
def test_classless(self): classless = Table(Domain(self.zoo.domain.attributes), self.zoo[:, 0:-1]) scorers = [Gini(), InfoGain(), GainRatio()] for scorer in scorers: with self.assertRaises(ValueError): scorer(classless, 0)
def test_select_2(self): gini = Gini() # 100th percentile = selection of top1 attribute sel1 = SelectBestFeatures(method=gini, k=1.0) data2 = sel1(self.titanic) best = max((gini(self.titanic, f), f) for f in self.titanic.domain.attributes)[1] self.assertEqual(data2.domain.attributes[0], best) # no k and no threshold, select all attributes sel2 = SelectBestFeatures(method=gini, k=0) data2 = sel2(self.titanic) self.assertEqual(len(data2.domain.attributes), len(self.titanic.domain.attributes)) # 31% = selection of top (out of 3) attributes sel3 = SelectBestFeatures(method=gini, k=0.31) data2 = sel3(self.titanic) self.assertEqual(len(data2.domain.attributes), 1) # 35% = selection of top (out of 3) attributes sel3 = SelectBestFeatures(method=gini, k=0.35) data2 = sel3(self.titanic) self.assertEqual(len(data2.domain.attributes), 1) # 1% = select one (out of 3) attributes sel3 = SelectBestFeatures(method=gini, k=0.01) data2 = sel3(self.titanic) self.assertEqual(len(data2.domain.attributes), 1) # number of selected attrs should be relative to number of current input attrs sel3 = SelectBestFeatures(method=gini, k=1.0) data2 = sel3(self.heart_disease) self.assertEqual(len(data2.domain.attributes), 13)
def test_select_1(self): data = Table('titanic') gini = Gini() s = SelectBestFeatures(method=gini, k=1) data2 = s(data) best = max((gini(data, f), f) for f in data.domain.attributes)[1] self.assertEqual(data2.domain.attributes[0], best)
def test_wrong_class_type(self): scorers = [Gini(), InfoGain(), GainRatio()] for scorer in scorers: with self.assertRaises(ValueError): scorer(self.housing, 0) with self.assertRaises(ValueError): Chi2()(self.housing, 0) with self.assertRaises(ValueError): ANOVA()(self.housing, 2) UnivariateLinearRegression()(self.housing, 2)
def test_discrete_scores_on_continuous_features(self): c = self.iris.columns for method in (Gini(), Chi2()): d1 = SelectBestFeatures(method=method)(self.iris) expected = \ (c.petal_length, c.petal_width, c.sepal_length, c.sepal_width) self.assertSequenceEqual(d1.domain.attributes, expected) scores = method(d1) self.assertEqual(len(scores), 4) score = method(d1, c.petal_length) self.assertIsInstance(score, float)
def test_gini(self): scorer = Gini() correct = [0.23786, 0.20855, 0.26235, 0.29300, 0.11946] np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)], correct, decimal=5)
def test_gini(self): scorer = Gini() correct = [0.11893, 0.10427, 0.13117, 0.14650, 0.05973] np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)], correct, decimal=5)