def plot_feature_importances(X, outputTable, labels, numEstimators = 50, numTopFeatures = 7): """ plots feature importances using random forest @param X: X data @param outputTable: a pandas data frame with columns being A,..,G @param labels: column labels @param numEstimators: n_estimators to use for RF @param numTopFeatures: number of features to show """ for col in outputTable.columns: print '-'*10, col y = outputTable[col] rf = RandomForester(num_features = X.shape[1], n_estimators = numEstimators) rf.fit(X, y) rf.plot(num_features=numTopFeatures, labels=labels) print rf.top_indices(labels=labels)[1]