def PlotCodeFrequency(codes, labels, save=False, path='', filename='img'): """ Plot the frequency of each code on the dataset and also which ones are DI or DP. - codes (pandas Series, list, numpy array): codes of each sample. - labels (pandas Series, list, numpy array): labels of each sample. - save (bool): tells if the plot should be saved. - path (string): path where to save the figure. - filename (string): name of the figure image file to be saved. """ DI = 0 DP = 1 df = pd.DataFrame() df['labels'] = labels df['codes'] = codes freq_di = dict() freq_dp = dict() total = len(df) N = 0 toPercentage = lambda value, total: (value * 100.0) / total codigosExistentes = list(df['codes'].unique()) codigosExistentes.sort() for c in codigosExistentes: freq_di[c] = toPercentage( ((df['codes'] == c) & (df['labels'] == DI)).sum(), total) freq_dp[c] = toPercentage( ((df['codes'] == c) & (df['labels'] == DP)).sum(), total) N += 1 ind = np.arange(N) width = 0.5 fig = plt.figure(figsize=(11, 5)) dp_bar = plt.bar(ind, list(freq_dp.values()), width, figure=fig) di_bar = plt.bar(ind, list(freq_di.values()), width, bottom=list(freq_dp.values()), figure=fig) minorTicks = MultipleLocator(1) plt.ylabel('Porcentagem (%)') plt.xlabel('Códigos') plt.title('Frequência de cada código no dataset') plt.xticks(ind, tuple(freq_di.keys())) plt.yticks(np.arange(0, 25, 5)) plt.axes().yaxis.set_minor_locator(minorTicks) plt.legend((di_bar[0], dp_bar[0]), ('DI', 'DP')) plt.grid(True, which='both', axis='y') plt.show() if (save): util.CheckAndCreatePath(path) util.SaveFigure(fig, path, filename)
def PlotAccuracyOfEachEventCode(codes, labels, prediction, save=False, path='', filename='img'): """ Plots the model accuracy for each event code. - codes (pandas Series, list, numpy array): codes of each sample. - labels (pandas Series, list, numpy array): labels of each sample. - prediction (pandas Series, list, numpy array): predictions of each sample. - save (bool): tells if the plot should be saved. - path (string): path where to save the figure. e.g.: 'images/' - filename (string): name of the figure image file to be saved. """ df = pd.DataFrame() df['labels'] = labels df['codes'] = codes df['prediction'] = prediction cods = df['codes'].unique() cods = np.sort(cods) cods = cods[np.invert(np.isnan(cods))] cods = cods.astype(int) toPercentage = lambda value, total: (value * 100.0) / total accuracy = list() for cod in cods: right = ((df['prediction'] == df['labels']) & (df['codes'] == cod)).sum() total = (df['codes'] == cod).sum() percentage = toPercentage(right, total) accuracy.append(percentage) # Plotar graficos fig, ax = plt.subplots(figsize=(11, 5)) xticks = list(range(0, len(cods))) yticks = list(range(0, 101, 10)) # show the figure, but do not block plt.bar(xticks, accuracy, figure=fig, align='center', width=0.3) ax.tick_params(axis='y', gridOn=True) ax.set_xticks(xticks) ax.set_xticklabels(cods) ax.set_yticks(yticks) ax.set_ylim([0, 100]) ax.set_ylabel('Acurácia (%)') ax.set_xlabel('Codigo') ax.set_title('Acurácia do modelo para cada código') plt.show(block=False) if (save): util.CheckAndCreatePath(path) util.SaveFigure(fig, path, filename)
def PlotFeatureImportanceXGBoost(model, save=False, path='', filename='img'): """ Plots the importance of each feature from a XGBoost model. - model (XGBoost model): model. - save (bool): tells if the plot should be saved. - path (string): path where to save the figure. - filename (string): name of the figure image file to be saved. """ fig, ax = plt.subplots(figsize=(6, 7)) xgb.plot_importance(model, ax=ax) plt.show() if (save): util.CheckAndCreatePath(path) util.SaveFigure(fig, path, filename)
def PlotConfusionMatrix(cm, cm_confidence_interval, classes, normalize=False, title='Matriz de Confusão', cmap=plt.cm.Blues, save=False, path='', imgname='matriz_de_confusão'): """Gera e mostra a matriz de confusão. Parameters ---------- cm : numpy.array Matriz de confusão. classes : list Lista com o nome de cada classe dos rótulos. Exemplo: ['DI', 'DP']. normalize : bool, default False Se verdadeiro, a matriz de confusão será normalizada. title : string, default 'Matriz de Confusão' Título da imagem da matriz de confusão. cmap : matplotlib.pyplot.cm, default matplotlib.pyplot.cm.Blues Colormap usado na matriz. save : bool, default False Se verdadeiro, a imagem da matriz de confusão será salva. path : str Diretório onde a imagem da matriz de confusão será salva. imgname : str, default 'img' Nome da imagem da matriz de confusão que será salva. """ if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Matriz de Confusão Normalizada") else: print('Matriz de Confusão, sem normalização') # formatted confusion matrix cm_format = np.copy(cm).astype(str) for i in range(cm.shape[0]): for j in range(cm.shape[1]): if normalize: if cm_confidence_interval is not None: cm_format[ i, j] = f'{cm[i,j]:.2f} ' + u"\u00B1" + f' {cm_confidence_interval[i,j]:.4f}' else: cm_format[i, j] = f'{cm[i,j]:.2f}' else: cm_format[i, j] = f'{cm[i,j]:.0f}' fig, ax = plt.subplots(figsize=(5, 5)) im = ax.imshow(cm, aspect='equal', interpolation='nearest', cmap=cmap) plt.title(title) cbar = ax.figure.colorbar(im, ax=ax, shrink=0.7) tick_marks = np.arange(len(classes)) ax.set_xticks(tick_marks) ax.set_yticks(tick_marks) ax.set_xticklabels(classes) ax.set_yticklabels(classes) ax.set_xticks(np.arange(cm.shape[1] + 1) - .5, minor=True) ax.set_yticks(np.arange(cm.shape[0] + 1) - .5, minor=True) ax.tick_params(axis='x', rotation=45) ax.tick_params(axis='both', labelsize=12) ax.set_xlabel('Classe Prevista', size=12) ax.set_ylabel('Classe Verdadeira', size=12) ax.titlesize = 13 thresh = (cm.max() + cm.min()) / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): plt.text(j, i, cm_format[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black", fontsize=12) plt.tight_layout() ax.labelsize = 12 plt.grid(False) plt.show(block=False) if (save): util.CheckAndCreatePath(path) util.SaveFigure(fig, path, imgname)