def test_relieff(self): old_breast = self.breast.copy() weights = ReliefF(random_state=42)(self.breast, None) found = [ self.breast.domain[attr].name for attr in reversed(weights.argsort()[-3:]) ] reference = ['Bare_Nuclei', 'Clump thickness', 'Marginal_Adhesion'] self.assertEqual(sorted(found), reference) # Original data is unchanged np.testing.assert_equal(old_breast.X, self.breast.X) np.testing.assert_equal(old_breast.Y, self.breast.Y) # Ensure it doesn't crash on adult dataset weights = ReliefF(random_state=42)(self.lenses, None) found = [ self.lenses.domain[attr].name for attr in weights.argsort()[-2:] ] # some leeway for randomness in relieff random instance selection self.assertIn('tear_rate', found) # Ensure it doesn't crash on missing target class values old_breast.Y[0] = np.nan weights = ReliefF()(old_breast, None) np.testing.assert_array_equal( ReliefF(random_state=1)(self.breast, None), ReliefF(random_state=1)(self.breast, None))
def test_relieff(self): old_monk = self.monk.copy() weights = ReliefF(random_state=42)(self.monk, None) found = [ self.monk.domain[attr].name for attr in reversed(weights.argsort()[-3:]) ] reference = ['a', 'b', 'e'] self.assertEqual(sorted(found), reference) # Original data is unchanged np.testing.assert_equal(old_monk.X, self.monk.X) np.testing.assert_equal(old_monk.Y, self.monk.Y) # Ensure it doesn't crash on adult dataset weights = ReliefF(random_state=42)(self.adult, None) found = [ self.adult.domain[attr].name for attr in weights.argsort()[-2:] ] # some leeway for randomness in relieff random instance selection self.assertIn('marital-status', found) # Ensure it doesn't crash on missing target class values old_monk.Y[0] = np.nan weights = ReliefF()(old_monk, None) np.testing.assert_array_equal( ReliefF(random_state=1)(self.monk, None), ReliefF(random_state=1)(self.monk, None))
def compute_attr_order(self): """ Order attributes by Relief if there is a target variable. In case of ties or without target, order by name. Add the class variable at the beginning when not coloring by class distribution. If `self.attrs` is not `None`, keep the ordering and just add or remove the class as needed. """ data = self.master.discrete_data class_var = data.domain.class_var if not self.attr_ordering: if class_var is None: self.attr_ordering = sorted(data.domain, key=attrgetter("name")) else: weights = ReliefF(n_iterations=100, k_nearest=10)(data) attrs = sorted(zip(weights, data.domain.attributes), key=lambda x: (-x[0], x[1].name)) self.attr_ordering = [a for _, a in attrs] if class_var is not None: if self._compute_class_dists(): if self.attr_ordering[0] is class_var: del self.attr_ordering[0] elif self.attr_ordering[0] is not class_var: self.attr_ordering.insert(0, class_var)
def test_relieff(self): old_monk = self.monk.copy() weights = ReliefF()(self.monk, None) found = [self.monk.domain[attr].name for attr in reversed(weights.argsort()[-3:])] reference = ['a', 'b', 'e'] self.assertEqual(sorted(found), reference) # Original data is unchanged np.testing.assert_equal(old_monk.X, self.monk.X) np.testing.assert_equal(old_monk.Y, self.monk.Y) # Ensure it doesn't crash on adult dataset weights = ReliefF()(self.adult, None) found = [self.adult.domain[attr].name for attr in weights.argsort()[-2:]] # some leeway for randomness in relieff random instance selection self.assertIn('marital-status', found) # Ensure it doesn't crash on missing target class values old_monk.Y[0] = np.nan weights = ReliefF()(old_monk, None)
def _fit(self): warnings.simplefilter(action='ignore') table = self.data.to_table() if self.params["eval_method"] == "relief": scores = ReliefF(table, n_iterations=100) elif self.params["eval_method"] == "fcbf": scores = FCBF(table) else: scores = RandomForestLearner().score_data(table)[0] for attr, score in zip(table.domain.attributes, scores): self.feature_importances[attr.name] = score
def test_relieff(self): old_breast = self.breast.copy() weights = ReliefF(random_state=42)(self.breast, None) found = [self.breast.domain[attr].name for attr in reversed(weights.argsort()[-3:])] reference = ['Bare_Nuclei', 'Clump thickness', 'Marginal_Adhesion'] self.assertEqual(sorted(found), reference) # Original data is unchanged np.testing.assert_equal(old_breast.X, self.breast.X) np.testing.assert_equal(old_breast.Y, self.breast.Y) # Ensure it doesn't crash on adult dataset weights = ReliefF(random_state=42)(self.lenses, None) found = [self.lenses.domain[attr].name for attr in weights.argsort()[-2:]] # some leeway for randomness in relieff random instance selection self.assertIn('tear_rate', found) # Ensure it doesn't crash on missing target class values old_breast.Y[0] = np.nan weights = ReliefF()(old_breast, None) np.testing.assert_array_equal( ReliefF(random_state=1)(self.breast, None), ReliefF(random_state=1)(self.breast, None) )
def score_heuristic(self): X = self.parent_widget.graph.scaled_data.T Y = self.parent_widget.data.Y dom = Orange.data.Domain( [ContinuousVariable(str(i)) for i in range(X.shape[1])], self.parent_widget.data.domain.class_vars) data = Orange.data.Table(dom, X, Y) weights = ReliefF(n_iterations=100, k_nearest=self.k)(data) attrs = sorted(zip( weights, (x.name for x in self.parent_widget.data.domain.attributes)), reverse=True) return [a for _, a in attrs]
def _compute_attr_order(self): """ used by VizRank to evaluate attributes """ master = self.master attrs = [v for v in chain(master.model_selected[:], master.model_other[:]) if v is not self.attr_color] data = self.master.data.transform(Domain(attributes=attrs, class_vars=self.attr_color)) self.data = data self.valid_data = np.hstack((~np.isnan(data.X), ~np.isnan(data.Y.reshape(len(data.Y), 1)))) weights = ReliefF(n_iterations=100, k_nearest=self.minK)(data) attrs = sorted(zip(weights, attrs), key=lambda x: (-x[0], x[1].name)) self.attr_ordering = attr_ordering = [a for _, a in attrs] return attr_ordering
def test_relieff(self): old_monk = self.monk.copy() weights = ReliefF()(self.monk, None) found = [ self.monk.domain[attr].name for attr in reversed(weights.argsort()[-3:]) ] reference = ['a', 'b', 'e'] self.assertEqual(sorted(found), reference) # Original data is unchanged np.testing.assert_equal(old_monk.X, self.monk.X) np.testing.assert_equal(old_monk.Y, self.monk.Y) # Ensure it doesn't crash on adult dataset weights = ReliefF()(self.adult, None) found = sorted( [self.adult.domain[attr].name for attr in weights.argsort()[-2:]]) reference = ['marital-status', 'relationship'] self.assertEqual(found, reference) # Ensure it doesn't crash on missing target class values old_monk.Y[0] = np.nan weights = ReliefF()(old_monk, None)