Example #1
0
def plot_cumulative_features_importance(features,
                                        threshold=0.90,
                                        plot_size=(12, 8),
                                        feature_type='features',
                                        return_threshold=False):
    """
    """
    plt.figure(figsize=plot_size)
    # Number of features needed for threshold cumulative importance
    importance_idx = np.min(np.where(features['cumulative_coefficient_frequency'] > threshold))
    thr_percentage = 100 * threshold
    required_features = importance_idx+1
    l = '{} {} required for \n{:.0f}% cumulative importance.'.format(required_features,
                                                                     feature_type,
                                                                     thr_percentage)
    # Cumulative importance plot
    plt.plot(range(len(features)), features['cumulative_coefficient_frequency'], 'b-', label=l)
    plt.xlabel('Number of {}'.format(feature_type.capitalize()), fontsize=12, labelpad=20)
    plt.ylabel('Cumulative {} frequency'.format(feature_type.capitalize()), fontsize=12, labelpad=20) 
    plt.title('Cumulative {} Importance'.format(feature_type.capitalize()), fontsize=12, pad=20)
    #plt.title(f'Cumulative Feature Importance\n\n{l}', fontsize=14, pad=20)
    # Threshold  vertical line plot
    plt.vlines(importance_idx + 1, ymin=0, ymax=1.05, linestyles='--', colors='red')
    plt.legend(loc='lower right', fontsize=10)
    plt.tight_layout()
    plt.show()
    if return_threshold:
        return required_features
Example #2
0
def plot_n_top_features(features, model_label, n=10, x_label='feature', y_label='coefficient', plot_size=(12, 4)):
    """
    """
    features.head(n).plot(x=x_label, y=y_label, kind='barh', figsize=plot_size)
    plt.gca().invert_yaxis()
    plt.title(f'{model_label} Top {n} Features', pad=20)
    plt.xlabel('Coefficients', labelpad=20)
    plt.ylabel('Features labels', labelpad=20)
    plt.show()
Example #3
0
def plot_n_top_features(features, model_label, n=10, x_label='feature', y_label='coefficient', plot_size=(12, 4)):
    """

    :param features: a dataframe which contains features data
    :param model_label:
    :param n:
    :param x_label:
    :param y_label:
    :param plot_size:
    :return:
    """
    features.head(n).plot(x=x_label, y=y_label, kind='barh', figsize=plot_size)
    plt.gca().invert_yaxis()
    plt.title(f'{model_label} Top {n} Features', pad=20)
    plt.xlabel('Coefficients', labelpad=20)
    plt.ylabel('Features labels', labelpad=20)
    plt.show()
Example #4
0
def plot_cumulative_features_importance(features, threshold=0.90, plot_size=(12, 8)):
    """

    :param features: a dataframe which contains features data
    :param threshold:
    :param plot_size:
    :return:
    """
    plt.figure(figsize=plot_size)
    # Number of features needed for threshold cumulative importance
    importance_idx = np.min(np.where(features['cumulative_coefficient_frequency'] > threshold))
    thr_percentage = 100 * threshold
    l = '{} features required for {:.0f}% of cumulative importance.'.format(importance_idx+1, thr_percentage)
    # Cumulative importance plot
    plt.plot(range(len(features)), features['cumulative_coefficient_frequency'], 'b-', label=l)
    plt.xlabel('Number of Features', fontsize=12, labelpad=20)
    plt.ylabel('Cumulative Coefficient frequency', fontsize=12, labelpad=20) 
    plt.title('Cumulative Feature Importance', fontsize=14, pad=20)
    # plt.title(f'Cumulative Feature Importance\n\n{l}', fontsize=14, pad=20)
    # Threshold  vertical line plot
    plt.vlines(importance_idx + 1, ymin=0, ymax=1.05, linestyles='--', colors='red')
    plt.legend(loc='lower right', fontsize=12)
    plt.tight_layout()
    plt.show()