Esempio n. 1
0
    def __call__(self, data):
        method = self.method
        # select default method according to the provided data
        if method is None:
            autoMethod = True
            discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) /
                           len(data.domain.attributes))
            if data.domain.has_discrete_class:
                if discr_ratio >= 0.5:
                    method = GainRatio()
                else:
                    method = ANOVA()
            else:
                method = UnivariateLinearRegression()

        features = data.domain.attributes
        try:
            scores = method(data)
        except ValueError:
            scores = self.score_only_nice_features(data, method)
        best = sorted(zip(scores, features),
                      key=itemgetter(0),
                      reverse=self.decreasing)
        if self.k:
            best = best[:self.k]
        if self.threshold:
            pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else
                    (lambda x: x[0] <= self.threshold))
            best = takewhile(pred, best)

        domain = Orange.data.Domain([f for s, f in best],
                                    data.domain.class_vars, data.domain.metas)
        return data.transform(domain)
Esempio n. 2
0
 def test_select_threshold(self):
     data = Table('wine')
     anova = ANOVA()
     t = 30
     data2 = SelectBestFeatures(method=anova, threshold=t)(data)
     self.assertTrue(
         all(anova(data, f) >= t for f in data2.domain.attributes))
Esempio n. 3
0
 def test_select_threshold(self):
     anova = ANOVA()
     t = 30
     data2 = SelectBestFeatures(method=anova,
                                threshold=t)(self.heart_disease)
     self.assertTrue(
         all(
             anova(self.heart_disease, f) >= t
             for f in data2.domain.attributes))
Esempio n. 4
0
    def test_wrong_class_type(self):
        scorers = [Gini(), InfoGain(), GainRatio()]
        for scorer in scorers:
            with self.assertRaises(ValueError):
                scorer(self.housing, 0)

        with self.assertRaises(ValueError):
            Chi2()(self.housing, 0)
        with self.assertRaises(ValueError):
            ANOVA()(self.housing, 2)
        UnivariateLinearRegression()(self.housing, 2)
Esempio n. 5
0
 def test_anova(self):
     nrows, ncols = 500, 5
     X = np.random.rand(nrows, ncols)
     y = 4 + (-3 * X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     data = Table(domain, X, y)
     scorer = ANOVA()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Esempio n. 6
0
    def __call__(self, data):
        method = self.method
        # select default method according to the provided data
        if method is None:
            autoMethod = True
            discr_ratio = (sum(a.is_discrete for a in data.domain.attributes) /
                           len(data.domain.attributes))
            if data.domain.has_discrete_class:
                if discr_ratio >= 0.5:
                    method = GainRatio()
                else:
                    method = ANOVA()
            else:
                method = UnivariateLinearRegression()

        if not isinstance(data.domain.class_var, method.class_type):
            raise ValueError(
                ("Scoring method {} requires a class variable " +
                 "of type {}.").format((method if type(method) == type else
                                        type(method)).__name__,
                                       method.class_type.__name__))
        features = data.domain.attributes
        try:
            scores = method(data)
        except ValueError:
            scores = self.score_only_nice_features(data, method)
        best = sorted(zip(scores, features),
                      key=itemgetter(0),
                      reverse=self.decreasing)
        if self.k:
            best = best[:self.k]
        if self.threshold:
            pred = ((lambda x: x[0] >= self.threshold) if self.decreasing else
                    (lambda x: x[0] <= self.threshold))
            best = takewhile(pred, best)

        domain = Orange.data.Domain([f for s, f in best],
                                    data.domain.class_vars, data.domain.metas)
        return data.from_table(domain, data)