def __call__(self, data): method = self.method # select default method according to the provided data if method is None: autoMethod = True discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) / len(data.domain.attributes)) if data.domain.has_discrete_class: if discr_ratio >= 0.5: method = GainRatio() else: method = ANOVA() else: method = UnivariateLinearRegression() features = data.domain.attributes try: scores = method(data) except ValueError: scores = self.score_only_nice_features(data, method) best = sorted(zip(scores, features), key=itemgetter(0), reverse=self.decreasing) if self.k: best = best[:self.k] if self.threshold: pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else (lambda x: x[0] <= self.threshold)) best = takewhile(pred, best) domain = Orange.data.Domain([f for s, f in best], data.domain.class_vars, data.domain.metas) return data.transform(domain)
def test_classless(self): classless = Table(Domain(self.zoo.domain.attributes), self.zoo[:, 0:-1]) scorers = [Gini(), InfoGain(), GainRatio()] for scorer in scorers: with self.assertRaises(ValueError): scorer(classless, 0)
def test_wrong_class_type(self): scorers = [Gini(), InfoGain(), GainRatio()] for scorer in scorers: with self.assertRaises(ValueError): scorer(self.housing, 0) with self.assertRaises(ValueError): Chi2()(self.housing, 0) with self.assertRaises(ValueError): ANOVA()(self.housing, 2) UnivariateLinearRegression()(self.housing, 2)
def __call__(self, data): method = self.method # select default method according to the provided data if method is None: autoMethod = True discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) / len(data.domain.attributes)) if data.domain.has_discrete_class: if discr_ratio >= 0.5: method = GainRatio() else: method = ANOVA() else: method = UnivariateLinearRegression() if not isinstance(data.domain.class_var, method.class_type): raise ValueError( ("Scoring method {} requires a class variable " + "of type {}.").format((method if type(method) == type else type(method)).__name__, method.class_type.__name__)) features = data.domain.attributes try: scores = method(data) except ValueError: scores = self.score_only_nice_features(data, method) best = sorted(zip(scores, features), key=itemgetter(0), reverse=self.decreasing) if self.k: best = best[:self.k] if self.threshold: pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else (lambda x: x[0] <= self.threshold)) best = takewhile(pred, best) domain = Orange.data.Domain([f for s, f in best], data.domain.class_vars, data.domain.metas) return data.from_table(domain, data)
def test_gain_ratio(self): scorer = GainRatio() correct = [0.80351, 1.00000, 0.84754, 1.00000, 0.59376] np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)], correct, decimal=5)