Beispiel #1
0
def get_top_features(clf, M=None, col_names=None, n=10, verbose=True):
    """Gets the top features for a fitted clf

    Parameters
    ----------
    clf : sklearn.base.BaseEstimator
        Fitted classifier with a feature_importances_ attribute
    M : numpy.ndarray or None
        Structured array corresponding to fitted clf. Used here to deterimine
        column names
    col_names : list of str or None
        List of column names corresponding to fitted clf.
    n : int
        Number of features to return
    verbose : boolean
        iff True, prints ranked features

    Returns
    -------
    numpy.ndarray
        structured array with top feature names and scores

    """
    if not isinstance(clf, BaseEstimator):
        raise ValueError('clf must be an instance of sklearn.Base.BaseEstimator')


    scores = clf.feature_importances_
    if col_names is None:
        if is_sa(M):
            col_names = M.dtype.names
        else:
            col_names = ['f{}'.format(i) for i in xrange(len(scores))]
    else:
        col_names = utils.check_col_names(col_names, n_cols = scores.shape[0])
    ranked_name_and_score = [(col_names[x], scores[x]) for x in 
                             scores.argsort()[::-1]]
    ranked_name_and_score = convert_to_sa(
            ranked_name_and_score[:n], 
            col_names=('feat_name', 'score'))
    if verbose:
        pprint_sa(ranked_name_and_score)
    return ranked_name_and_score
Beispiel #2
0
def get_top_features(clf, M=None, col_names=None, n=10, verbose=True):
    """Gets the top features for a fitted clf

    Parameters
    ----------
    clf : sklearn.base.BaseEstimator
        Fitted classifier with a feature_importances_ attribute
    M : numpy.ndarray or None
        Structured array corresponding to fitted clf. Used here to deterimine
        column names
    col_names : list of str or None
        List of column names corresponding to fitted clf.
    n : int
        Number of features to return
    verbose : boolean
        iff True, prints ranked features

    Returns
    -------
    numpy.ndarray
        structured array with top feature names and scores

    """
    if not isinstance(clf, BaseEstimator):
        raise ValueError(
            'clf must be an instance of sklearn.Base.BaseEstimator')

    scores = clf.feature_importances_
    if col_names is None:
        if is_sa(M):
            col_names = M.dtype.names
        else:
            col_names = ['f{}'.format(i) for i in xrange(len(scores))]
    else:
        col_names = utils.check_col_names(col_names, n_cols=scores.shape[0])
    ranked_name_and_score = [(col_names[x], scores[x])
                             for x in scores.argsort()[::-1]]
    ranked_name_and_score = convert_to_sa(ranked_name_and_score[:n],
                                          col_names=('feat_name', 'score'))
    if verbose:
        pprint_sa(ranked_name_and_score)
    return ranked_name_and_score
Beispiel #3
0
 def test_check_col_names(self):
     self.assertEqual(utils.check_col_names('f0'), ['f0'])
     utils.check_col_names(['f0', 'f1', 'f2'])
     self.assertEqual(utils.check_col_names([u'f0', 'f1', u'f2']), ['f0', 'f1', 'f2'])
     self.assertRaises(ValueError, utils.check_col_names, {})
     self.assertRaises(ValueError, utils.check_col_names, ['f0', 4, 'f2'])