def PlotCodeFrequency(codes, labels, save=False, path='', filename='img'):
    """
    Plot the frequency of each code on the dataset and also which ones are DI or DP.
    
    - codes (pandas Series, list, numpy array): codes of each sample.
    - labels (pandas Series, list, numpy array): labels of each sample.
    - save (bool): tells if the plot should be saved.
    - path (string): path where to save the figure.
    - filename (string): name of the figure image file to be saved.
    """
    DI = 0
    DP = 1

    df = pd.DataFrame()
    df['labels'] = labels
    df['codes'] = codes

    freq_di = dict()
    freq_dp = dict()

    total = len(df)
    N = 0
    toPercentage = lambda value, total: (value * 100.0) / total
    codigosExistentes = list(df['codes'].unique())
    codigosExistentes.sort()
    for c in codigosExistentes:
        freq_di[c] = toPercentage(
            ((df['codes'] == c) & (df['labels'] == DI)).sum(), total)
        freq_dp[c] = toPercentage(
            ((df['codes'] == c) & (df['labels'] == DP)).sum(), total)
        N += 1

    ind = np.arange(N)
    width = 0.5

    fig = plt.figure(figsize=(11, 5))
    dp_bar = plt.bar(ind, list(freq_dp.values()), width, figure=fig)
    di_bar = plt.bar(ind,
                     list(freq_di.values()),
                     width,
                     bottom=list(freq_dp.values()),
                     figure=fig)

    minorTicks = MultipleLocator(1)

    plt.ylabel('Porcentagem (%)')
    plt.xlabel('Códigos')
    plt.title('Frequência de cada código no dataset')
    plt.xticks(ind, tuple(freq_di.keys()))
    plt.yticks(np.arange(0, 25, 5))
    plt.axes().yaxis.set_minor_locator(minorTicks)
    plt.legend((di_bar[0], dp_bar[0]), ('DI', 'DP'))
    plt.grid(True, which='both', axis='y')

    plt.show()

    if (save):
        util.CheckAndCreatePath(path)
        util.SaveFigure(fig, path, filename)
Ejemplo n.º 2
0
def PlotAccuracyOfEachEventCode(codes,
                                labels,
                                prediction,
                                save=False,
                                path='',
                                filename='img'):
    """
    Plots the model accuracy for each event code.
    
    - codes (pandas Series, list, numpy array): codes of each sample.
    - labels (pandas Series, list, numpy array): labels of each sample. 
    - prediction (pandas Series, list, numpy array): predictions of each sample.
    - save (bool): tells if the plot should be saved.
    - path (string): path where to save the figure. e.g.: 'images/'
    - filename (string): name of the figure image file to be saved.
    """

    df = pd.DataFrame()
    df['labels'] = labels
    df['codes'] = codes
    df['prediction'] = prediction

    cods = df['codes'].unique()
    cods = np.sort(cods)
    cods = cods[np.invert(np.isnan(cods))]
    cods = cods.astype(int)

    toPercentage = lambda value, total: (value * 100.0) / total
    accuracy = list()
    for cod in cods:
        right = ((df['prediction'] == df['labels']) &
                 (df['codes'] == cod)).sum()
        total = (df['codes'] == cod).sum()
        percentage = toPercentage(right, total)
        accuracy.append(percentage)

    # Plotar graficos
    fig, ax = plt.subplots(figsize=(11, 5))
    xticks = list(range(0, len(cods)))
    yticks = list(range(0, 101, 10))

    # show the figure, but do not block

    plt.bar(xticks, accuracy, figure=fig, align='center', width=0.3)
    ax.tick_params(axis='y', gridOn=True)
    ax.set_xticks(xticks)
    ax.set_xticklabels(cods)
    ax.set_yticks(yticks)
    ax.set_ylim([0, 100])
    ax.set_ylabel('Acurácia (%)')
    ax.set_xlabel('Codigo')
    ax.set_title('Acurácia do modelo para cada código')

    plt.show(block=False)

    if (save):
        util.CheckAndCreatePath(path)
        util.SaveFigure(fig, path, filename)
Ejemplo n.º 3
0
def PlotFeatureImportanceXGBoost(model, save=False, path='', filename='img'):
    """
    Plots the importance of each feature from a XGBoost model.
    
    - model (XGBoost model): model.
    - save (bool): tells if the plot should be saved.
    - path (string): path where to save the figure.
    - filename (string): name of the figure image file to be saved.
    """
    fig, ax = plt.subplots(figsize=(6, 7))
    xgb.plot_importance(model, ax=ax)
    plt.show()

    if (save):
        util.CheckAndCreatePath(path)
        util.SaveFigure(fig, path, filename)
Ejemplo n.º 4
0
def PlotConfusionMatrix(cm,
                        cm_confidence_interval,
                        classes,
                        normalize=False,
                        title='Matriz de Confusão',
                        cmap=plt.cm.Blues,
                        save=False,
                        path='',
                        imgname='matriz_de_confusão'):
    """Gera e mostra a matriz de confusão.

    Parameters
    ----------
    cm : numpy.array
        Matriz de confusão.
    classes : list
        Lista com o nome de cada classe dos rótulos. Exemplo: ['DI', 'DP'].
    normalize : bool, default False
        Se verdadeiro, a matriz de confusão será normalizada.
    title : string, default 'Matriz de Confusão'
        Título da imagem da matriz de confusão.
    cmap : matplotlib.pyplot.cm, default matplotlib.pyplot.cm.Blues
        Colormap usado na matriz.
    save : bool, default False
        Se verdadeiro, a imagem da matriz de confusão será salva.
    path : str
        Diretório onde a imagem da matriz de confusão será salva.
    imgname : str, default 'img'
        Nome da imagem da matriz de confusão que será salva.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Matriz de Confusão Normalizada")
    else:
        print('Matriz de Confusão, sem normalização')

    # formatted confusion matrix
    cm_format = np.copy(cm).astype(str)
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            if normalize:
                if cm_confidence_interval is not None:
                    cm_format[
                        i,
                        j] = f'{cm[i,j]:.2f} ' + u"\u00B1" + f' {cm_confidence_interval[i,j]:.4f}'
                else:
                    cm_format[i, j] = f'{cm[i,j]:.2f}'
            else:
                cm_format[i, j] = f'{cm[i,j]:.0f}'

    fig, ax = plt.subplots(figsize=(5, 5))
    im = ax.imshow(cm, aspect='equal', interpolation='nearest', cmap=cmap)
    plt.title(title)
    cbar = ax.figure.colorbar(im, ax=ax, shrink=0.7)
    tick_marks = np.arange(len(classes))
    ax.set_xticks(tick_marks)
    ax.set_yticks(tick_marks)

    ax.set_xticklabels(classes)
    ax.set_yticklabels(classes)

    ax.set_xticks(np.arange(cm.shape[1] + 1) - .5, minor=True)
    ax.set_yticks(np.arange(cm.shape[0] + 1) - .5, minor=True)
    ax.tick_params(axis='x', rotation=45)
    ax.tick_params(axis='both', labelsize=12)
    ax.set_xlabel('Classe Prevista', size=12)
    ax.set_ylabel('Classe Verdadeira', size=12)
    ax.titlesize = 13

    thresh = (cm.max() + cm.min()) / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j,
                     i,
                     cm_format[i, j],
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black",
                     fontsize=12)

    plt.tight_layout()
    ax.labelsize = 12
    plt.grid(False)
    plt.show(block=False)

    if (save):
        util.CheckAndCreatePath(path)
        util.SaveFigure(fig, path, imgname)