Exemple #1
0
def roc_decision_rule(decision_rate, thr, decision_threshold):
    """
    cf :
    - https://openclassrooms.com/fr/courses/4297211-evaluez-les-performances-dun-modele-de-machine-learning/
      4308261-evaluez-un-algorithme-de-classification-qui-retourne-des-scores
    """
    idx = np.min(np.where(decision_rate > decision_threshold))
    threshold = thr[idx]
    return threshold, idx
Exemple #2
0
    def scree_plot(self,
                   threshold=None,
                   save_as_img=False):  # (% Explained Variance)
        """

        """
        scree = self.evr * 100
        plt.bar(np.arange(len(scree)) + 1, scree)
        if threshold is not None:
            scree_freq = scree / scree.sum()
            scree_cumsum = np.cumsum(scree_freq)
            # Number of features needed for threshold cumulative importance
            n_features = np.min(np.where(scree_cumsum > threshold)) + 1
            threshold_percentage = 100 * threshold
            threshold_legend = '{} features required for {:.0f}% of inertia.'.format(
                n_features, threshold_percentage)
            # Threshold  vertical line plot
            plt.vlines(n_features,
                       ymin=0,
                       ymax=threshold_percentage,
                       linestyles='--',
                       colors='red')
            plt.plot(np.arange(len(scree)) + 1,
                     scree.cumsum(),
                     c="red",
                     marker='o',
                     label=threshold_legend)
            plt.legend(loc='lower right', fontsize=12)
        else:
            plt.plot(np.arange(len(scree)) + 1,
                     scree.cumsum(),
                     c="red",
                     marker='o')
        plt.xlabel("Inertia axis rank", labelpad=20)
        plt.ylabel("Inertia (%)", labelpad=20)
        plt.title("Scree plot" +
                  "\n(Kaiser criterion = {} : Elbow criterion = {})".format(
                      self.kaiser_criterion(),
                      elbow_criterion(total_inertia=self.evr)),
                  pad=20)
        if save_as_img:
            plt.tight_layout()
            plt.savefig('scree.jpg')
        plt.show(block=False)
Exemple #3
0
def elbow_criterion(total_inertia, threshold=0.25):
    """
    Find total components/clusters number based on Elbow criterion :
    (cf : https://en.wikipedia.org/wiki/Elbow_method_(clustering))
    """
    features_nb = len(total_inertia)
    var_cumsum = total_inertia.cumsum()
    # Compute variations ratio from cumulated explained variance values
    variations = [
        abs(percentage_change(var_cumsum[i + 1], x))
        for i, x in enumerate(var_cumsum) if i + 1 < features_nb
    ]
    # Get total components selected
    if threshold is 'min':
        n_selected = variations.index(min(variations)) + 1
    elif type(threshold) is float:
        variations = np.array(variations)
        n_selected = np.min(
            np.where(variations <= np.quantile(variations, q=threshold))) + 1
    return n_selected