Example #1
0
    def __call__(self, data):
        method = self.method
        # select default method according to the provided data
        if method is None:
            autoMethod = True
            discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) /
                           len(data.domain.attributes))
            if data.domain.has_discrete_class:
                if discr_ratio >= 0.5:
                    method = GainRatio()
                else:
                    method = ANOVA()
            else:
                method = UnivariateLinearRegression()

        features = data.domain.attributes
        try:
            scores = method(data)
        except ValueError:
            scores = self.score_only_nice_features(data, method)
        best = sorted(zip(scores, features),
                      key=itemgetter(0),
                      reverse=self.decreasing)
        if self.k:
            best = best[:self.k]
        if self.threshold:
            pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else
                    (lambda x: x[0] <= self.threshold))
            best = takewhile(pred, best)

        domain = Orange.data.Domain([f for s, f in best],
                                    data.domain.class_vars, data.domain.metas)
        return data.transform(domain)
Example #2
0
 def test_classless(self):
     classless = Table(Domain(self.zoo.domain.attributes), self.zoo[:,
                                                                    0:-1])
     scorers = [Gini(), InfoGain(), GainRatio()]
     for scorer in scorers:
         with self.assertRaises(ValueError):
             scorer(classless, 0)
Example #3
0
    def test_wrong_class_type(self):
        scorers = [Gini(), InfoGain(), GainRatio()]
        for scorer in scorers:
            with self.assertRaises(ValueError):
                scorer(self.housing, 0)

        with self.assertRaises(ValueError):
            Chi2()(self.housing, 0)
        with self.assertRaises(ValueError):
            ANOVA()(self.housing, 2)
        UnivariateLinearRegression()(self.housing, 2)
Example #4
0
    def __call__(self, data):
        method = self.method
        # select default method according to the provided data
        if method is None:
            autoMethod = True
            discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) /
                           len(data.domain.attributes))
            if data.domain.has_discrete_class:
                if discr_ratio >= 0.5:
                    method = GainRatio()
                else:
                    method = ANOVA()
            else:
                method = UnivariateLinearRegression()

        if not isinstance(data.domain.class_var, method.class_type):
            raise ValueError(
                ("Scoring method {} requires a class variable " +
                 "of type {}.").format((method if type(method) == type else
                                        type(method)).__name__,
                                       method.class_type.__name__))
        features = data.domain.attributes
        try:
            scores = method(data)
        except ValueError:
            scores = self.score_only_nice_features(data, method)
        best = sorted(zip(scores, features),
                      key=itemgetter(0),
                      reverse=self.decreasing)
        if self.k:
            best = best[:self.k]
        if self.threshold:
            pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else
                    (lambda x: x[0] <= self.threshold))
            best = takewhile(pred, best)

        domain = Orange.data.Domain([f for s, f in best],
                                    data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)
Example #5
0
 def test_gain_ratio(self):
     scorer = GainRatio()
     correct = [0.80351, 1.00000, 0.84754, 1.00000, 0.59376]
     np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)],
                                    correct,
                                    decimal=5)