def plot_cumulative_features_importance(features, threshold=0.90, plot_size=(12, 8), feature_type='features', return_threshold=False): """ """ plt.figure(figsize=plot_size) # Number of features needed for threshold cumulative importance importance_idx = np.min(np.where(features['cumulative_coefficient_frequency'] > threshold)) thr_percentage = 100 * threshold required_features = importance_idx+1 l = '{} {} required for \n{:.0f}% cumulative importance.'.format(required_features, feature_type, thr_percentage) # Cumulative importance plot plt.plot(range(len(features)), features['cumulative_coefficient_frequency'], 'b-', label=l) plt.xlabel('Number of {}'.format(feature_type.capitalize()), fontsize=12, labelpad=20) plt.ylabel('Cumulative {} frequency'.format(feature_type.capitalize()), fontsize=12, labelpad=20) plt.title('Cumulative {} Importance'.format(feature_type.capitalize()), fontsize=12, pad=20) #plt.title(f'Cumulative Feature Importance\n\n{l}', fontsize=14, pad=20) # Threshold vertical line plot plt.vlines(importance_idx + 1, ymin=0, ymax=1.05, linestyles='--', colors='red') plt.legend(loc='lower right', fontsize=10) plt.tight_layout() plt.show() if return_threshold: return required_features
def plot_n_top_features(features, model_label, n=10, x_label='feature', y_label='coefficient', plot_size=(12, 4)): """ """ features.head(n).plot(x=x_label, y=y_label, kind='barh', figsize=plot_size) plt.gca().invert_yaxis() plt.title(f'{model_label} Top {n} Features', pad=20) plt.xlabel('Coefficients', labelpad=20) plt.ylabel('Features labels', labelpad=20) plt.show()
def plot_n_top_features(features, model_label, n=10, x_label='feature', y_label='coefficient', plot_size=(12, 4)): """ :param features: a dataframe which contains features data :param model_label: :param n: :param x_label: :param y_label: :param plot_size: :return: """ features.head(n).plot(x=x_label, y=y_label, kind='barh', figsize=plot_size) plt.gca().invert_yaxis() plt.title(f'{model_label} Top {n} Features', pad=20) plt.xlabel('Coefficients', labelpad=20) plt.ylabel('Features labels', labelpad=20) plt.show()
def plot_cumulative_features_importance(features, threshold=0.90, plot_size=(12, 8)): """ :param features: a dataframe which contains features data :param threshold: :param plot_size: :return: """ plt.figure(figsize=plot_size) # Number of features needed for threshold cumulative importance importance_idx = np.min(np.where(features['cumulative_coefficient_frequency'] > threshold)) thr_percentage = 100 * threshold l = '{} features required for {:.0f}% of cumulative importance.'.format(importance_idx+1, thr_percentage) # Cumulative importance plot plt.plot(range(len(features)), features['cumulative_coefficient_frequency'], 'b-', label=l) plt.xlabel('Number of Features', fontsize=12, labelpad=20) plt.ylabel('Cumulative Coefficient frequency', fontsize=12, labelpad=20) plt.title('Cumulative Feature Importance', fontsize=14, pad=20) # plt.title(f'Cumulative Feature Importance\n\n{l}', fontsize=14, pad=20) # Threshold vertical line plot plt.vlines(importance_idx + 1, ymin=0, ymax=1.05, linestyles='--', colors='red') plt.legend(loc='lower right', fontsize=12) plt.tight_layout() plt.show()