Beispiel #1
0
 def test_calculate_feature_importances_ensemble(self):
     estimator = Pipeline([('dv', DictVectorizer()),
                           ('est', RandomForestClassifier(n_estimators=10))
                           ])
     estimator.fit(_extract_features(self.features),
                   self.targets.values.ravel())
     fi = _calculate_feature_importances(estimator)
     self.assertEqual(sorted(self.exp_rf), sorted(fi))
Beispiel #2
0
 def test_null_feature_importance(self):
     exp = pd.DataFrame([1, 1, 1],
                        index=['o1', 'o2', 'o3'],
                        columns=['importance'])
     exp.index.name = 'feature'
     tab = biom.Table(np.array([[1., 2., 3.], [3., 2., 1.], [7., 6., 9.]]),
                      ['o1', 'o2', 'o3'], ['s1', 's2', 's3'])
     tab = _extract_features(tab)
     pdt.assert_frame_equal(_null_feature_importance(tab), exp)
 def test_extract_features(self):
     table = self.table_ecam_fp
     dicts = _extract_features(table)
     dv = DictVectorizer()
     dv.fit(dicts)
     features = table.ids('observation')
     self.assertEqual(set(dv.get_feature_names()), set(features))
     self.assertEqual(len(dicts), len(table.ids()))
     for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
         for feature, count in zip(features, table_row):
             if count == 0:
                 self.assertTrue(feature not in dict_row)
             else:
                 self.assertEqual(dict_row[feature], count)
Beispiel #4
0
 def test_calculate_feature_importances_svm(self):
     estimator = Pipeline([('dv', DictVectorizer()), ('est', LinearSVC())])
     estimator.fit(_extract_features(self.features),
                   self.targets.values.ravel())
     fi = _calculate_feature_importances(estimator)
     self.assertEqual(sorted(self.exp_lsvm), sorted(fi))
Beispiel #5
0
 def biom_to_features(self, table):
     return _extract_features(table)