Exemple #1
0
def plot_feature_importances(X, outputTable, labels, numEstimators = 50, numTopFeatures = 7):
    """
    plots feature importances using random forest
    @param X: X data
    @param outputTable: a pandas data frame with columns being A,..,G
    @param labels: column labels
    @param numEstimators: n_estimators to use for RF
    @param numTopFeatures: number of features to show
    """

    for col in outputTable.columns:
        print '-'*10, col
        y = outputTable[col]

        rf = RandomForester(num_features = X.shape[1], n_estimators = numEstimators)
        rf.fit(X, y)
        rf.plot(num_features=numTopFeatures, labels=labels)
        print rf.top_indices(labels=labels)[1]