def get_top_features(clf, M=None, col_names=None, n=10, verbose=True): """Gets the top features for a fitted clf Parameters ---------- clf : sklearn.base.BaseEstimator Fitted classifier with a feature_importances_ attribute M : numpy.ndarray or None Structured array corresponding to fitted clf. Used here to deterimine column names col_names : list of str or None List of column names corresponding to fitted clf. n : int Number of features to return verbose : boolean iff True, prints ranked features Returns ------- numpy.ndarray structured array with top feature names and scores """ if not isinstance(clf, BaseEstimator): raise ValueError('clf must be an instance of sklearn.Base.BaseEstimator') scores = clf.feature_importances_ if col_names is None: if is_sa(M): col_names = M.dtype.names else: col_names = ['f{}'.format(i) for i in xrange(len(scores))] else: col_names = utils.check_col_names(col_names, n_cols = scores.shape[0]) ranked_name_and_score = [(col_names[x], scores[x]) for x in scores.argsort()[::-1]] ranked_name_and_score = convert_to_sa( ranked_name_and_score[:n], col_names=('feat_name', 'score')) if verbose: pprint_sa(ranked_name_and_score) return ranked_name_and_score
def get_top_features(clf, M=None, col_names=None, n=10, verbose=True): """Gets the top features for a fitted clf Parameters ---------- clf : sklearn.base.BaseEstimator Fitted classifier with a feature_importances_ attribute M : numpy.ndarray or None Structured array corresponding to fitted clf. Used here to deterimine column names col_names : list of str or None List of column names corresponding to fitted clf. n : int Number of features to return verbose : boolean iff True, prints ranked features Returns ------- numpy.ndarray structured array with top feature names and scores """ if not isinstance(clf, BaseEstimator): raise ValueError( 'clf must be an instance of sklearn.Base.BaseEstimator') scores = clf.feature_importances_ if col_names is None: if is_sa(M): col_names = M.dtype.names else: col_names = ['f{}'.format(i) for i in xrange(len(scores))] else: col_names = utils.check_col_names(col_names, n_cols=scores.shape[0]) ranked_name_and_score = [(col_names[x], scores[x]) for x in scores.argsort()[::-1]] ranked_name_and_score = convert_to_sa(ranked_name_and_score[:n], col_names=('feat_name', 'score')) if verbose: pprint_sa(ranked_name_and_score) return ranked_name_and_score
def test_check_col_names(self): self.assertEqual(utils.check_col_names('f0'), ['f0']) utils.check_col_names(['f0', 'f1', 'f2']) self.assertEqual(utils.check_col_names([u'f0', 'f1', u'f2']), ['f0', 'f1', 'f2']) self.assertRaises(ValueError, utils.check_col_names, {}) self.assertRaises(ValueError, utils.check_col_names, ['f0', 4, 'f2'])