Ejemplo n.º 1
0
    def test_relieff(self):
        old_breast = self.breast.copy()
        weights = ReliefF(random_state=42)(self.breast, None)
        found = [
            self.breast.domain[attr].name
            for attr in reversed(weights.argsort()[-3:])
        ]
        reference = ['Bare_Nuclei', 'Clump thickness', 'Marginal_Adhesion']
        self.assertEqual(sorted(found), reference)
        # Original data is unchanged
        np.testing.assert_equal(old_breast.X, self.breast.X)
        np.testing.assert_equal(old_breast.Y, self.breast.Y)
        # Ensure it doesn't crash on adult dataset
        weights = ReliefF(random_state=42)(self.lenses, None)
        found = [
            self.lenses.domain[attr].name for attr in weights.argsort()[-2:]
        ]
        # some leeway for randomness in relieff random instance selection
        self.assertIn('tear_rate', found)
        # Ensure it doesn't crash on missing target class values
        old_breast.Y[0] = np.nan
        weights = ReliefF()(old_breast, None)

        np.testing.assert_array_equal(
            ReliefF(random_state=1)(self.breast, None),
            ReliefF(random_state=1)(self.breast, None))
Ejemplo n.º 2
0
    def test_relieff(self):
        old_monk = self.monk.copy()
        weights = ReliefF(random_state=42)(self.monk, None)
        found = [
            self.monk.domain[attr].name
            for attr in reversed(weights.argsort()[-3:])
        ]
        reference = ['a', 'b', 'e']
        self.assertEqual(sorted(found), reference)
        # Original data is unchanged
        np.testing.assert_equal(old_monk.X, self.monk.X)
        np.testing.assert_equal(old_monk.Y, self.monk.Y)
        # Ensure it doesn't crash on adult dataset
        weights = ReliefF(random_state=42)(self.adult, None)
        found = [
            self.adult.domain[attr].name for attr in weights.argsort()[-2:]
        ]
        # some leeway for randomness in relieff random instance selection
        self.assertIn('marital-status', found)
        # Ensure it doesn't crash on missing target class values
        old_monk.Y[0] = np.nan
        weights = ReliefF()(old_monk, None)

        np.testing.assert_array_equal(
            ReliefF(random_state=1)(self.monk, None),
            ReliefF(random_state=1)(self.monk, None))
Ejemplo n.º 3
0
    def compute_attr_order(self):
        """
        Order attributes by Relief if there is a target variable. In case of
        ties or without target, order by name.

        Add the class variable at the beginning when not coloring by class
        distribution.

        If `self.attrs` is not `None`, keep the ordering and just add or remove
        the class as needed.
        """
        data = self.master.discrete_data
        class_var = data.domain.class_var
        if not self.attr_ordering:
            if class_var is None:
                self.attr_ordering = sorted(data.domain, key=attrgetter("name"))
            else:
                weights = ReliefF(n_iterations=100, k_nearest=10)(data)
                attrs = sorted(zip(weights, data.domain.attributes),
                               key=lambda x: (-x[0], x[1].name))
                self.attr_ordering = [a for _, a in attrs]
        if class_var is not None:
            if self._compute_class_dists():
                if self.attr_ordering[0] is class_var:
                    del self.attr_ordering[0]
            elif self.attr_ordering[0] is not class_var:
                self.attr_ordering.insert(0, class_var)
Ejemplo n.º 4
0
 def test_relieff(self):
     old_monk = self.monk.copy()
     weights = ReliefF()(self.monk, None)
     found = [self.monk.domain[attr].name for attr in reversed(weights.argsort()[-3:])]
     reference = ['a', 'b', 'e']
     self.assertEqual(sorted(found), reference)
     # Original data is unchanged
     np.testing.assert_equal(old_monk.X, self.monk.X)
     np.testing.assert_equal(old_monk.Y, self.monk.Y)
     # Ensure it doesn't crash on adult dataset
     weights = ReliefF()(self.adult, None)
     found = [self.adult.domain[attr].name for attr in weights.argsort()[-2:]]
     # some leeway for randomness in relieff random instance selection
     self.assertIn('marital-status', found)
     # Ensure it doesn't crash on missing target class values
     old_monk.Y[0] = np.nan
     weights = ReliefF()(old_monk, None)
    def _fit(self):
        warnings.simplefilter(action='ignore')
        table = self.data.to_table()
        if self.params["eval_method"] == "relief":
            scores = ReliefF(table, n_iterations=100)
        elif self.params["eval_method"] == "fcbf":
            scores = FCBF(table)
        else:
            scores = RandomForestLearner().score_data(table)[0]

        for attr, score in zip(table.domain.attributes, scores):
            self.feature_importances[attr.name] = score
Ejemplo n.º 6
0
    def test_relieff(self):
        old_breast = self.breast.copy()
        weights = ReliefF(random_state=42)(self.breast, None)
        found = [self.breast.domain[attr].name for attr in reversed(weights.argsort()[-3:])]
        reference = ['Bare_Nuclei', 'Clump thickness', 'Marginal_Adhesion']
        self.assertEqual(sorted(found), reference)
        # Original data is unchanged
        np.testing.assert_equal(old_breast.X, self.breast.X)
        np.testing.assert_equal(old_breast.Y, self.breast.Y)
        # Ensure it doesn't crash on adult dataset
        weights = ReliefF(random_state=42)(self.lenses, None)
        found = [self.lenses.domain[attr].name for attr in weights.argsort()[-2:]]
        # some leeway for randomness in relieff random instance selection
        self.assertIn('tear_rate', found)
        # Ensure it doesn't crash on missing target class values
        old_breast.Y[0] = np.nan
        weights = ReliefF()(old_breast, None)

        np.testing.assert_array_equal(
            ReliefF(random_state=1)(self.breast, None),
            ReliefF(random_state=1)(self.breast, None)
        )
Ejemplo n.º 7
0
 def score_heuristic(self):
     X = self.parent_widget.graph.scaled_data.T
     Y = self.parent_widget.data.Y
     dom = Orange.data.Domain(
         [ContinuousVariable(str(i)) for i in range(X.shape[1])],
         self.parent_widget.data.domain.class_vars)
     data = Orange.data.Table(dom, X, Y)
     weights = ReliefF(n_iterations=100, k_nearest=self.k)(data)
     attrs = sorted(zip(
         weights,
         (x.name for x in self.parent_widget.data.domain.attributes)),
                    reverse=True)
     return [a for _, a in attrs]
Ejemplo n.º 8
0
 def _compute_attr_order(self):
     """
     used by VizRank to evaluate attributes
     """
     master = self.master
     attrs = [v for v in chain(master.model_selected[:], master.model_other[:])
              if v is not self.attr_color]
     data = self.master.data.transform(Domain(attributes=attrs, class_vars=self.attr_color))
     self.data = data
     self.valid_data = np.hstack((~np.isnan(data.X), ~np.isnan(data.Y.reshape(len(data.Y), 1))))
     weights = ReliefF(n_iterations=100, k_nearest=self.minK)(data)
     attrs = sorted(zip(weights, attrs), key=lambda x: (-x[0], x[1].name))
     self.attr_ordering = attr_ordering = [a for _, a in attrs]
     return attr_ordering
Ejemplo n.º 9
0
 def test_relieff(self):
     old_monk = self.monk.copy()
     weights = ReliefF()(self.monk, None)
     found = [
         self.monk.domain[attr].name
         for attr in reversed(weights.argsort()[-3:])
     ]
     reference = ['a', 'b', 'e']
     self.assertEqual(sorted(found), reference)
     # Original data is unchanged
     np.testing.assert_equal(old_monk.X, self.monk.X)
     np.testing.assert_equal(old_monk.Y, self.monk.Y)
     # Ensure it doesn't crash on adult dataset
     weights = ReliefF()(self.adult, None)
     found = sorted(
         [self.adult.domain[attr].name for attr in weights.argsort()[-2:]])
     reference = ['marital-status', 'relationship']
     self.assertEqual(found, reference)
     # Ensure it doesn't crash on missing target class values
     old_monk.Y[0] = np.nan
     weights = ReliefF()(old_monk, None)