Esempio n. 1
0
def fig16_4():
    results_dir = get_project_results_dir()
    init_methods = [
        'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 7)
    fig = plt.figure(figsize=(fig_size))

    ax = fig.add_subplot()

    ax.title.set_text(f'Dataset G2 (CI final)')
    ax.set_prop_cycle(color=plt.cm.Set1.colors)

    dimensions_sizes = [
        '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024'
    ]
    std_sizes = list(range(10, 101, 10))

    for init_method in init_methods:
        final_percentages_means = []
        for dimensions_size in dimensions_sizes:
            final_percentages = []
            for std_size in std_sizes:
                df = pd.read_csv(results_dir /
                                 f"g2-{dimensions_size}-{std_size}" /
                                 f"{init_method}.csv")
                n_rows = df.shape[0]
                final_zeros = df['ci_final'].value_counts().get(0, 0)
                final_percentage = (final_zeros / n_rows) * 100
                final_percentages.append(final_percentage)
            final_percentages_means.append(np.mean(final_percentages))
        label = "KMeansPP" if init_method == "kmeans++" else init_method
        ax.plot(dimensions_sizes, final_percentages_means, label=label)

    ax.set_xticks(dimensions_sizes)
    ax.set_xlabel('Dimensões')

    ax.set_yticks([99, 99.20, 99.40, 99.60, 99.80, 100])
    ax.set_yticklabels(
        ['99\%', '99,2\%', '99,4\%', '99,6\%', '99,8\%', '100\%'])
    ax.set_ylabel('Taxa de sucesso (\%)')

    plt.legend(bbox_to_anchor=(1, 1), loc="upper left")
    plt.tight_layout()

    filename = get_project_results_dir().joinpath('fig16_4.eps')

    return fig, str(filename)
Esempio n. 2
0
def fig16_1():
    results_dir = get_project_results_dir()
    init_methods = [
        'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 7)
    fig = plt.figure(figsize=(fig_size))

    ax = fig.add_subplot()

    ax.title.set_text(f'Dataset DIM (CI inicial)')
    ax.set_prop_cycle(color=plt.cm.Set1.colors)

    dimensions_sizes = ['32', '64', '128', '256', '512', '1024']

    for init_method in init_methods:
        initial_percentages = []
        for dimensions_size in dimensions_sizes:
            df = pd.read_csv(results_dir / f"dim{dimensions_size}" /
                             f"{init_method}.csv")
            n_rows = df.shape[0]
            initial_zeros = df['ci_initial'].value_counts().get(0, 0)
            initial_percentage = (initial_zeros / n_rows) * 100
            initial_percentages.append(initial_percentage)
        label = "KMeansPP" if init_method == "kmeans++" else init_method
        ax.plot(dimensions_sizes, initial_percentages, label=label)

    ax.set_xticks(dimensions_sizes)
    ax.set_xlabel('Dimensões')

    ax.set_yticks([0, 20, 40, 60, 80, 100])
    ax.set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%'])
    ax.set_ylabel('Taxa de sucesso (\%)')

    plt.legend(bbox_to_anchor=(1, 1), loc="upper left")
    plt.tight_layout()

    filename = get_project_results_dir().joinpath('fig16_1.eps')

    return fig, str(filename)
Esempio n. 3
0
def plot_hq_mtx(parameters_dict):
    X = parameters_dict['X']
    y = parameters_dict['y']

    k_lst = parameters_dict['k_lst']
    p_lst = parameters_dict['p_lst']
    measures_lst = parameters_dict['measures_lst']
    dataset_name = parameters_dict['dataset_name']
    target_names = parameters_dict['target_names']

    pu.figure_setup()

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, shuffle=True, stratify=y)

    fig_size = pu.get_fig_size(15, 4.4)
    fig = plt.figure(figsize=(fig_size))
    fig.suptitle(f'Dataset: {dataset_name.upper()}')

    for i in range(3):
        ax = fig.add_subplot(1,3,i+1)
        ax.set_axisbelow(True)

        curr_name = measures_lst[i]

        pg = LVQ(prototypes_number=35, version=curr_name)
        s_set = pg.generate(X_train,y_train)

        classifier = Knn(n_neighbors=k_lst[-1]).fit(s_set[0], s_set[1])
        plot_confusion_matrix(classifier, X_test, y_test,
                                display_labels=target_names,
                                ax=ax,
                                cmap=plt.cm.Blues,
                                normalize=None
                                )
        
        ax.set_title(curr_name)

    plt.tight_layout()
    
    filename = get_project_results_dir().joinpath(dataset_name + '_cf_mtx.eps')
    
    pu.save_fig(fig, str(filename))
Esempio n. 4
0
def fig14():
    results_dir = get_project_results_dir()
    init_methods = [
        'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 7)
    fig = plt.figure(figsize=(fig_size))

    ax = fig.add_subplot()

    ax.set_prop_cycle(color=plt.cm.Set1.colors)

    cluster_sizes = list(range(10, 101, 10))
    for init_method in init_methods:
        final_percentages = []
        for cluster_size in cluster_sizes:
            df = pd.read_csv(results_dir / f"b2-sub-{cluster_size}" /
                             f"{init_method}.csv")
            n_rows = df.shape[0]
            final_zeros = df['ci_final'].value_counts().get(0, 0)
            final_percentage = (final_zeros / n_rows) * 100
            final_percentages.append(final_percentage)
        label = "KMeansPP" if init_method == "kmeans++" else init_method
        ax.plot(cluster_sizes, final_percentages, label=label)

    ax.set_xticks(cluster_sizes)
    ax.set_xlabel('Clusters (k)')

    ax.set_yticks([0, 20, 40, 60, 80, 100])
    ax.set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%'])
    ax.set_ylabel('Taxa de sucesso (\%)')

    plt.legend(bbox_to_anchor=(1, 1), loc="upper left")
    plt.tight_layout()

    filename = get_project_results_dir().joinpath('fig14.eps')

    return fig, str(filename)
Esempio n. 5
0
def fig15():
    results_dir = get_project_results_dir()
    init_methods = [
        'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 7)
    fig = plt.figure(figsize=(fig_size))

    ax = fig.add_subplot()

    ax.set_prop_cycle(color=plt.cm.Set1.colors)

    cluster_sizes = list(range(10, 101, 10))
    for init_method in init_methods:
        relative_cis = []
        for cluster_size in cluster_sizes:
            df = pd.read_csv(results_dir / f"b2-sub-{cluster_size}" /
                             f"{init_method}.csv")
            n_rows = df.shape[0]
            ci_mean = df['ci_final'].mean()
            relative_cis.append(ci_mean / cluster_size)
        label = "KMeansPP" if init_method == "kmeans++" else init_method
        ax.plot(cluster_sizes, relative_cis, label=label)

    ax.set_xticks(cluster_sizes)
    ax.set_xlabel('Clusters (k)')

    ax.set_ylabel('CI relativo (CI/k)')

    plt.legend(bbox_to_anchor=(1, 1), loc="upper left")
    plt.tight_layout()

    filename = get_project_results_dir().joinpath('fig15.eps')

    return fig, str(filename)
Esempio n. 6
0
def plot_hq_summary_p(parameters_dict):
    k_lst = parameters_dict['k_lst']
    p_lst = parameters_dict['p_lst']
    measures_lst = parameters_dict['measures_lst']
    dataset_name = parameters_dict['dataset_name']

    fmt = ['ro--','g^--','bs--']

    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 6)
    fig = plt.figure(figsize=(fig_size))
    fig.suptitle(f'Dataset: {dataset_name.upper()}')
 
    ax = fig.add_subplot(1,2,1)

    ax.set_xlabel('Protótipos')
    ax.set_ylabel('Tempo de Processamento (s)')

    ax.set_axisbelow(True)

    for i in range(len(fmt)):
        curr_measure = f'{measures_lst[i]}-p'
        curr_name = measures_lst[i]
        ax.plot(
            p_lst, 
            parameters_dict[curr_measure][0], 
            fmt[i], 
            markersize=1.5, 
            linewidth=0.5,
            label=curr_name)
        ax.set_xticks(p_lst)

    plt.legend()
    plt.tight_layout()

    ax = fig.add_subplot(1,2,2)

    ax.set_xlabel('Protótipos')
    ax.set_ylabel('Acurácia')

    ax.set_axisbelow(True)
    
    for i in range(len(fmt)):
        curr_measure = f'{measures_lst[i]}-p'
        curr_name = measures_lst[i]
        ax.plot(
            p_lst, 
            parameters_dict[curr_measure][1], 
            fmt[i], 
            markersize=1.5, 
            linewidth=0.5,
            label=curr_name)
        ax.set_xticks(p_lst)

    plt.legend()
    plt.tight_layout()

    filename = get_project_results_dir().joinpath(dataset_name + '_summary_p.eps')

    pu.save_fig(fig, str(filename))
Esempio n. 7
0
def fig13():
    results_dir = get_project_results_dir()
    init_methods = [
        'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    init_methods_bar = [
        'Rand-P', 'Rand-C', 'Maxmin', 'KMeansPP', 'Bradley', 'Sorting',
        'Projection', 'Luxburg', 'Split'
    ]
    high_overlap_datasets = [
        'g2-2-40',
        'g2-2-50',
        'g2-2-60',
        'g2-2-70',
        'g2-2-80',
        'g2-2-90',
        'g2-2-100',
        'g2-4-50',
        'g2-4-60',
        'g2-4-70',
        'g2-4-80',
        'g2-4-90',
        'g2-4-100',
        'g2-8-70',
        'g2-8-80',
        'g2-8-90',
        'g2-8-100',
        'g2-16-90',
        'g2-16-100',
    ]

    pu.figure_setup()

    fig_size = pu.get_fig_size(15, 7)
    fig = plt.figure(figsize=(fig_size))

    axs = fig.subplots(ncols=2)

    axs[0].title.set_text(f'Baixa sobreposição')

    initial = []
    final = []
    for init_method in init_methods:
        initial_percentages = []
        final_percentages = []
        for dataset in results_dir.glob('g2*'):
            if dataset.stem in high_overlap_datasets:
                continue

            df = pd.read_csv(dataset / f"{init_method}.csv")
            n_rows = df.shape[0]
            initial_zeros = df['ci_initial'].value_counts().get(0, 0)
            initial_percentage = (initial_zeros / n_rows) * 100
            initial_percentages.append(initial_percentage)
            final_zeros = df['ci_final'].value_counts().get(0, 0)
            final_percentage = (final_zeros / n_rows) * 100
            final_percentages.append(final_percentage)
        initial.append(np.mean(initial_percentages))
        final.append(np.mean(final_percentages) - np.mean(initial_percentages))

    # axs[0].bar(init_methods_bar, initial, label='Inicial')
    axs[0].bar(init_methods_bar,
               initial,
               label='Inicial',
               color='gray',
               edgecolor='black')
    axs[0].bar(init_methods_bar,
               final,
               bottom=initial,
               label='Final',
               color='white',
               edgecolor='black')

    axs[0].set_ylabel('Taxa de sucesso (\%)')

    axs[0].tick_params('x', labelrotation=70)

    axs[0].set_ylim([0, 110])
    axs[0].set_yticks([0, 20, 40, 60, 80, 100])
    axs[0].set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%'])

    axs[0].grid(b=False, axis='x')

    axs[1].title.set_text(f'Alta sobreposição')

    initial = []
    final = []
    for init_method in init_methods:
        initial_percentages = []
        final_percentages = []
        for dataset in high_overlap_datasets:
            df = pd.read_csv(results_dir / dataset / f"{init_method}.csv")
            n_rows = df.shape[0]
            initial_zeros = df['ci_initial'].value_counts().get(0, 0)
            initial_percentage = (initial_zeros / n_rows) * 100
            initial_percentages.append(initial_percentage)
            final_zeros = df['ci_final'].value_counts().get(0, 0)
            final_percentage = (final_zeros / n_rows) * 100
            final_percentages.append(final_percentage)
        initial.append(np.mean(initial_percentages))
        final.append(np.mean(final_percentages) - np.mean(initial_percentages))

    init_methods[3] = 'KMeansPP'
    axs[1].bar(init_methods_bar,
               initial,
               label='Inicial',
               color='gray',
               edgecolor='black')
    axs[1].bar(init_methods_bar,
               final,
               bottom=initial,
               label='Final',
               color='white',
               edgecolor='black')

    axs[1].tick_params('x', labelrotation=70)

    axs[1].set_ylim([0, 110])
    axs[1].set_yticks([0, 20, 40, 60, 80, 100])
    axs[1].set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%'])

    axs[1].grid(b=False, axis='x')

    plt.legend()
    plt.tight_layout()

    filename = get_project_results_dir().joinpath('fig13.eps')

    return fig, str(filename)