def plot_training_curve(self, xticks_step=5): evolution = pd.DataFrame({ 'Generation': self.log.select("gen"), 'Max Accuracy': self.log.select("max"), 'Average Accuracy': self.log.select("avg"), 'Min Accuracy': self.log.select("min") }) plt.title('Hyperparameter Optimisation') plt.plot(evolution['Generation'], evolution['Min Accuracy'], 'b', color='C1', label='Min') plt.plot(evolution['Generation'], evolution['Average Accuracy'], 'b', color='C2', label='Average') plt.plot(evolution['Generation'], evolution['Max Accuracy'], 'b', color='C3', label='Max') plt.legend(loc='lower right') plt.ylabel('Accuracy') plt.xlabel('Generation') plt.xticks( [x for x in range(0, self.number_of_generations + 1, xticks_step)]) plt.show()
def plot_confusion_matrix(cm, class_names): """ Returns a matplotlib figure containing the plotted confusion matrix. Args: cm (array, shape = [n, n]): a confusion matrix of integer classes class_names (array, shape = [n]): String names of the integer classes """ size = len(class_names) figure = plt.figure(figsize=(size, size)) plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) plt.title("Confusion matrix") plt.colorbar() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names, rotation=45) plt.yticks(tick_marks, class_names) # Compute the labels from the normalized confusion matrix. labels = np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis], decimals=2) # Use white text if squares are dark; otherwise black. threshold = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): color = "white" if cm[i, j] > threshold else "black" plt.text(j, i, labels[i, j], horizontalalignment="center", color=color) plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') return figure
def plot_dendrogram(self, plot_size=(10, 25), title_pad=20, xlabel_pad=20, orient='left', leaf_text_size=16, save_as_img=False, filename='cah', file_type='jpg'): plt.figure(figsize=plot_size) plt.title('Hierarchical Clustering Dendrogram', pad=title_pad) plt.xlabel('distance', labelpad=xlabel_pad) self.dendrogram_data = dendrogram(self.Z, labels=self.categories, orientation=orient, leaf_font_size=leaf_text_size) if save_as_img: plt.tight_layout() plt.savefig(f'{filename}.{file_type}') plt.show()
def plot_lift_curve(y_test, y_predicted, curve_color='blue', font_size=14, x_lim=(0, 1), y_lim=(0, 1)): fpr, tpr, thr = roc_curve(y_test, y_predicted) percentage_sample = [(n + 1) / len(tpr) for n in range(len(tpr))] plt.plot(fpr, percentage_sample, color=curve_color, lw=2) plt.xlim(x_lim) plt.ylim(y_lim) plt.xlabel('Percentage of sample', fontsize=font_size) plt.ylabel('Recall (TPR)', fontsize=font_size) plt.title('LIFT curve')
def plot_3d_factorial_plan(self, x_comp=1, y_comp=2, z_comp=3, cat_colors=None): """ """ # Store results of PCA in a data frame # result = pd.DataFrame(self.X_projected, columns=['PCA{}'.format(i+1) for i in range(self.n_comp)]) cat_colors = self.cat_colors if cat_colors is None else cat_colors result = self.components_table my_dpi = 96 fig = plt.figure(figsize=(480 / my_dpi, 480 / my_dpi), dpi=my_dpi) # fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # ax.set_aspect('equal') axes_3d_comp = [x_comp, y_comp, z_comp] x_comp_label, y_comp_label, z_comp_label = [ 'F{}'.format(n) for n in axes_3d_comp ] # old "PCA" # Components axes limits xmin, xmax = (min(result[x_comp_label]), max(result[x_comp_label])) ymin, ymax = (min(result[y_comp_label]), max(result[y_comp_label])) zmin, zmax = (min(result[z_comp_label]), max(result[z_comp_label])) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) ax.set_zlim(zmin, zmax) # Components axes coordinates xaxis = [(xmin, xmax), (0, 0), (0, 0)] yaxis = [(0, 0), (ymin, ymax), (0, 0)] zaxis = [(0, 0), (0, 0), (zmin, zmax)] # Plot components axes for a in [xaxis, yaxis, zaxis]: ax.plot(a[0], a[1], a[2], 'b') # label the axes ax.set_xlabel("PC1") ax.set_ylabel("PC2") ax.set_zlabel("PC3") fig.tight_layout() # Plot ax.scatter(result[x_comp_label], result[y_comp_label], result[z_comp_label], c=cat_colors, cmap="Set2_r", s=60) plt.title("3D PCA") plt.show()
def plot_training_curve(history, value_type='accuracy', fine_tuning=False, initial_epochs=None): """ https://machinelearningmastery.com/diagnose-overfitting-underfitting-lstm-models/ """ plt.plot(history.history[value_type]) plt.plot(history.history[f'val_{value_type}']) if fine_tuning: plt.plot([initial_epochs - 1, initial_epochs - 1], plt.ylim(), label='Start Fine Tuning') plt.title('Training curve') plt.ylabel(value_type.capitalize()) plt.xlabel('Epoch') plt.legend(['train', 'test'], loc='upper left') plt.show()
def scree_plot(self, threshold=None, save_as_img=False): # (% Explained Variance) """ """ scree = self.evr * 100 plt.bar(np.arange(len(scree)) + 1, scree) if threshold is not None: scree_freq = scree / scree.sum() scree_cumsum = np.cumsum(scree_freq) # Number of features needed for threshold cumulative importance n_features = np.min(np.where(scree_cumsum > threshold)) + 1 threshold_percentage = 100 * threshold threshold_legend = '{} features required for {:.0f}% of inertia.'.format( n_features, threshold_percentage) # Threshold vertical line plot plt.vlines(n_features, ymin=0, ymax=threshold_percentage, linestyles='--', colors='red') plt.plot(np.arange(len(scree)) + 1, scree.cumsum(), c="red", marker='o', label=threshold_legend) plt.legend(loc='lower right', fontsize=12) else: plt.plot(np.arange(len(scree)) + 1, scree.cumsum(), c="red", marker='o') plt.xlabel("Inertia axis rank", labelpad=20) plt.ylabel("Inertia (%)", labelpad=20) plt.title("Scree plot" + "\n(Kaiser criterion = {} : Elbow criterion = {})".format( self.kaiser_criterion(), elbow_criterion(total_inertia=self.evr)), pad=20) if save_as_img: plt.tight_layout() plt.savefig('scree.jpg') plt.show(block=False)
def scree_plot(self, pair_comp=False, save_as_img=False): # (% Explained Variance) scree = self.pca.explained_variance_ratio_ * 100 plt.bar(np.arange(len(scree)) + 1, scree) plt.plot(np.arange(len(scree)) + 1, scree.cumsum(), c="red", marker='o') plt.xlabel("rang de l'axe d'inertie", labelpad=20) plt.ylabel("pourcentage d'inertie", labelpad=20) plt.title("Eboulis des valeurs propres" + "\n(Kaiser criterion = {} : Elbow criterion = {})".format( self.kaiser_criterion(pair_comp), self.elbow_criterion(pair_comp)), pad=20) if save_as_img: plt.tight_layout() plt.savefig('scree.jpg') plt.show(block=False)
def plot_roc_curve(y_test, y_predicted, curve_color='coral', font_size=14, x_lim=(0, 1), y_lim=(0, 1)): """ cf : - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html """ fpr, tpr, thr = roc_curve(y_test, y_predicted) auroc = auc(fpr, tpr) plt.plot(fpr, tpr, label=f'AUROC : {auroc:.2f}', color=curve_color, lw=2) # Random classifier line plot plt.plot([0, 1], [0, 1], linestyle='--') plt.xlim(x_lim) plt.ylim(y_lim) plt.xlabel('1 - specificity (FPR)', fontsize=font_size) plt.ylabel('Recall (TPR)', fontsize=font_size) plt.title('Receiver operating characteristic') plt.legend(loc="lower right")
def correlation_matrix(df, as_chart=True, precision=2, title=None, rotate=90, save_as_img=False, size=(16, 12)): """ """ corr = df.corr() if as_chart: colormap = plt.cm.RdBu plt.figure(figsize=size) if title is None: title = 'Pearson Correlation of Features' plt.title(title, y=1.05, size=15, pad=20) mask = np.triu(np.ones_like(corr, dtype=np.bool)) ax = sns.heatmap(corr, linewidths=0.5, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True, mask=mask, cbar_kws={"shrink": .5}, fmt='.{}f'.format(precision)) ax.set_xlim(0, df.shape[1] - 1) ax.set_ylim(df.shape[1], 1) plt.xticks(rotation=rotate) if save_as_img: plt.tight_layout() plt.savefig('corr_matrix.jpg') plt.show() else: return corr.style.background_gradient( cmap='coolwarm').set_precision(precision)
def plot_correlation_circle(self, n_plan=None, labels=None, label_rotation=0, lims=None, save_as_img=False, plot_size=(10, 8)): """ """ factorial_plan_nb = self.default_factorial_plan_nb if n_plan is None else n_plan # Build a list of tuples (example : [(0, 1), (2, 3), ... ]) axis_ranks = [(x, x + 1) for x in range(0, factorial_plan_nb, 2)] pcs = self.pca.components_ for d1, d2 in axis_ranks: if d2 < self.n_comp: fig, ax = plt.subplots(figsize=plot_size) # Fix factorial plan limits if lims is not None: xmin, xmax, ymin, ymax = lims elif pcs.shape[1] < 30: xmin, xmax, ymin, ymax = -1, 1, -1, 1 else: xmin, xmax, ymin, ymax = min(pcs[d1, :]), max( pcs[d1, :]), min(pcs[d2, :]), max(pcs[d2, :]) # affichage des flèches # s'il y a plus de 30 flèches, on n'affiche pas le triangle à leur extrémité if pcs.shape[1] < 30: plt.quiver(np.zeros(pcs.shape[1]), np.zeros(pcs.shape[1]), pcs[d1, :], pcs[d2, :], angles='xy', scale_units='xy', scale=1, color="grey") # (doc : https://matplotlib.org/api/_as_gen/matplotlib.pyplot.quiver.html) else: lines = [[[0, 0], [x, y]] for x, y in pcs[[d1, d2]].T] ax.add_collection( LineCollection(lines, axes=ax, alpha=.1, color='black')) # Display variables labels if labels is not None: for i, (x, y) in enumerate(pcs[[d1, d2]].T): if xmin <= x <= xmax and ymin <= y <= ymax: plt.text(x, y, labels[i], fontsize='14', ha='center', va='center', rotation=label_rotation, color="blue", alpha=0.5) # fontsize : 14 # Plot circle circle = plt.Circle((0, 0), 1, facecolor='none', edgecolor='b') plt.gca().add_artist(circle) # définition des limites du graphique plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) # affichage des lignes horizontales et verticales plt.plot([-1, 1], [0, 0], color='grey', ls='--') plt.plot([0, 0], [-1, 1], color='grey', ls='--') # Axes labels with % explained variance plt.xlabel('F{} ({}%)'.format(d1 + 1, round(100 * self.evr[d1], 1)), labelpad=20) plt.ylabel('F{} ({}%)'.format(d2 + 1, round(100 * self.evr[d2], 1)), labelpad=20) plt.title("Cercle des corrélations (F{} et F{})".format( d1 + 1, d2 + 1), pad=20) if save_as_img: plt.tight_layout() plt.savefig( 'corr_circle_{}.jpg'.format(1 if d1 == 0 else d1)) plt.show(block=False)
def plot_factorial_planes(self, n_plan=None, X_projected=None, labels=None, alpha=1, illustrative_var=None, illustrative_var_title=None, save_as_img=False, plot_size=(10, 8)): """ :param: axis_nb: the total number of axes to display (default is kaiser criterion divided by 2) """ X_projected = self.X_projected if X_projected is None else X_projected factorial_plan_nb = self.default_factorial_plan_nb if n_plan is None else n_plan axis_ranks = [(x, x + 1) for x in range(0, factorial_plan_nb, 2)] for d1, d2 in axis_ranks: if d2 < self.n_comp: fig = plt.figure(figsize=plot_size) # Display data points if illustrative_var is None: plt.scatter(X_projected[:, d1], X_projected[:, d2], alpha=alpha) else: illustrative_var = np.array(illustrative_var) for value in np.unique(illustrative_var): selected = np.where(illustrative_var == value) plt.scatter(X_projected[selected, d1], X_projected[selected, d2], alpha=alpha, label=value) plt.legend(title=illustrative_var_title if illustrative_var_title is not None else None) # Display data points labels if labels is not None: for i, (x, y) in enumerate(X_projected[:, [d1, d2]]): plt.text(x, y, labels[i], fontsize='12', ha='center', va='bottom') # Fix factorial plan limits boundary = np.max(np.abs(X_projected[:, [d1, d2]])) * 1.1 plt.xlim([-boundary, boundary]) plt.ylim([-boundary, boundary]) # Display horizontal & vertical lines plt.plot([-100, 100], [0, 0], color='grey', ls='--') plt.plot([0, 0], [-100, 100], color='grey', ls='--') # Axes labels with % explained variance plt.xlabel('F{} ({}%)'.format(d1 + 1, round(100 * self.evr[d1], 1)), labelpad=20) plt.ylabel('F{} ({}%)'.format(d2 + 1, round(100 * self.evr[d2], 1)), labelpad=20) plt.title("Projection des individus (sur F{} et F{})".format( d1 + 1, d2 + 1), pad=20) if save_as_img: plt.tight_layout() plt.savefig( 'factorial_plan_{}.jpg'.format(1 if d1 == 0 else d1)) plt.show(block=False)