def fig16_4(): results_dir = get_project_results_dir() init_methods = [ 'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] pu.figure_setup() fig_size = pu.get_fig_size(15, 7) fig = plt.figure(figsize=(fig_size)) ax = fig.add_subplot() ax.title.set_text(f'Dataset G2 (CI final)') ax.set_prop_cycle(color=plt.cm.Set1.colors) dimensions_sizes = [ '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024' ] std_sizes = list(range(10, 101, 10)) for init_method in init_methods: final_percentages_means = [] for dimensions_size in dimensions_sizes: final_percentages = [] for std_size in std_sizes: df = pd.read_csv(results_dir / f"g2-{dimensions_size}-{std_size}" / f"{init_method}.csv") n_rows = df.shape[0] final_zeros = df['ci_final'].value_counts().get(0, 0) final_percentage = (final_zeros / n_rows) * 100 final_percentages.append(final_percentage) final_percentages_means.append(np.mean(final_percentages)) label = "KMeansPP" if init_method == "kmeans++" else init_method ax.plot(dimensions_sizes, final_percentages_means, label=label) ax.set_xticks(dimensions_sizes) ax.set_xlabel('Dimensões') ax.set_yticks([99, 99.20, 99.40, 99.60, 99.80, 100]) ax.set_yticklabels( ['99\%', '99,2\%', '99,4\%', '99,6\%', '99,8\%', '100\%']) ax.set_ylabel('Taxa de sucesso (\%)') plt.legend(bbox_to_anchor=(1, 1), loc="upper left") plt.tight_layout() filename = get_project_results_dir().joinpath('fig16_4.eps') return fig, str(filename)
def fig16_1(): results_dir = get_project_results_dir() init_methods = [ 'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] pu.figure_setup() fig_size = pu.get_fig_size(15, 7) fig = plt.figure(figsize=(fig_size)) ax = fig.add_subplot() ax.title.set_text(f'Dataset DIM (CI inicial)') ax.set_prop_cycle(color=plt.cm.Set1.colors) dimensions_sizes = ['32', '64', '128', '256', '512', '1024'] for init_method in init_methods: initial_percentages = [] for dimensions_size in dimensions_sizes: df = pd.read_csv(results_dir / f"dim{dimensions_size}" / f"{init_method}.csv") n_rows = df.shape[0] initial_zeros = df['ci_initial'].value_counts().get(0, 0) initial_percentage = (initial_zeros / n_rows) * 100 initial_percentages.append(initial_percentage) label = "KMeansPP" if init_method == "kmeans++" else init_method ax.plot(dimensions_sizes, initial_percentages, label=label) ax.set_xticks(dimensions_sizes) ax.set_xlabel('Dimensões') ax.set_yticks([0, 20, 40, 60, 80, 100]) ax.set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']) ax.set_ylabel('Taxa de sucesso (\%)') plt.legend(bbox_to_anchor=(1, 1), loc="upper left") plt.tight_layout() filename = get_project_results_dir().joinpath('fig16_1.eps') return fig, str(filename)
def plot_hq_mtx(parameters_dict): X = parameters_dict['X'] y = parameters_dict['y'] k_lst = parameters_dict['k_lst'] p_lst = parameters_dict['p_lst'] measures_lst = parameters_dict['measures_lst'] dataset_name = parameters_dict['dataset_name'] target_names = parameters_dict['target_names'] pu.figure_setup() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, shuffle=True, stratify=y) fig_size = pu.get_fig_size(15, 4.4) fig = plt.figure(figsize=(fig_size)) fig.suptitle(f'Dataset: {dataset_name.upper()}') for i in range(3): ax = fig.add_subplot(1,3,i+1) ax.set_axisbelow(True) curr_name = measures_lst[i] pg = LVQ(prototypes_number=35, version=curr_name) s_set = pg.generate(X_train,y_train) classifier = Knn(n_neighbors=k_lst[-1]).fit(s_set[0], s_set[1]) plot_confusion_matrix(classifier, X_test, y_test, display_labels=target_names, ax=ax, cmap=plt.cm.Blues, normalize=None ) ax.set_title(curr_name) plt.tight_layout() filename = get_project_results_dir().joinpath(dataset_name + '_cf_mtx.eps') pu.save_fig(fig, str(filename))
def fig14(): results_dir = get_project_results_dir() init_methods = [ 'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] pu.figure_setup() fig_size = pu.get_fig_size(15, 7) fig = plt.figure(figsize=(fig_size)) ax = fig.add_subplot() ax.set_prop_cycle(color=plt.cm.Set1.colors) cluster_sizes = list(range(10, 101, 10)) for init_method in init_methods: final_percentages = [] for cluster_size in cluster_sizes: df = pd.read_csv(results_dir / f"b2-sub-{cluster_size}" / f"{init_method}.csv") n_rows = df.shape[0] final_zeros = df['ci_final'].value_counts().get(0, 0) final_percentage = (final_zeros / n_rows) * 100 final_percentages.append(final_percentage) label = "KMeansPP" if init_method == "kmeans++" else init_method ax.plot(cluster_sizes, final_percentages, label=label) ax.set_xticks(cluster_sizes) ax.set_xlabel('Clusters (k)') ax.set_yticks([0, 20, 40, 60, 80, 100]) ax.set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']) ax.set_ylabel('Taxa de sucesso (\%)') plt.legend(bbox_to_anchor=(1, 1), loc="upper left") plt.tight_layout() filename = get_project_results_dir().joinpath('fig14.eps') return fig, str(filename)
def fig15(): results_dir = get_project_results_dir() init_methods = [ 'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] pu.figure_setup() fig_size = pu.get_fig_size(15, 7) fig = plt.figure(figsize=(fig_size)) ax = fig.add_subplot() ax.set_prop_cycle(color=plt.cm.Set1.colors) cluster_sizes = list(range(10, 101, 10)) for init_method in init_methods: relative_cis = [] for cluster_size in cluster_sizes: df = pd.read_csv(results_dir / f"b2-sub-{cluster_size}" / f"{init_method}.csv") n_rows = df.shape[0] ci_mean = df['ci_final'].mean() relative_cis.append(ci_mean / cluster_size) label = "KMeansPP" if init_method == "kmeans++" else init_method ax.plot(cluster_sizes, relative_cis, label=label) ax.set_xticks(cluster_sizes) ax.set_xlabel('Clusters (k)') ax.set_ylabel('CI relativo (CI/k)') plt.legend(bbox_to_anchor=(1, 1), loc="upper left") plt.tight_layout() filename = get_project_results_dir().joinpath('fig15.eps') return fig, str(filename)
def plot_hq_summary_p(parameters_dict): k_lst = parameters_dict['k_lst'] p_lst = parameters_dict['p_lst'] measures_lst = parameters_dict['measures_lst'] dataset_name = parameters_dict['dataset_name'] fmt = ['ro--','g^--','bs--'] pu.figure_setup() fig_size = pu.get_fig_size(15, 6) fig = plt.figure(figsize=(fig_size)) fig.suptitle(f'Dataset: {dataset_name.upper()}') ax = fig.add_subplot(1,2,1) ax.set_xlabel('Protótipos') ax.set_ylabel('Tempo de Processamento (s)') ax.set_axisbelow(True) for i in range(len(fmt)): curr_measure = f'{measures_lst[i]}-p' curr_name = measures_lst[i] ax.plot( p_lst, parameters_dict[curr_measure][0], fmt[i], markersize=1.5, linewidth=0.5, label=curr_name) ax.set_xticks(p_lst) plt.legend() plt.tight_layout() ax = fig.add_subplot(1,2,2) ax.set_xlabel('Protótipos') ax.set_ylabel('Acurácia') ax.set_axisbelow(True) for i in range(len(fmt)): curr_measure = f'{measures_lst[i]}-p' curr_name = measures_lst[i] ax.plot( p_lst, parameters_dict[curr_measure][1], fmt[i], markersize=1.5, linewidth=0.5, label=curr_name) ax.set_xticks(p_lst) plt.legend() plt.tight_layout() filename = get_project_results_dir().joinpath(dataset_name + '_summary_p.eps') pu.save_fig(fig, str(filename))
def fig13(): results_dir = get_project_results_dir() init_methods = [ 'Rand-P', 'Rand-C', 'Maxmin', 'kmeans++', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] init_methods_bar = [ 'Rand-P', 'Rand-C', 'Maxmin', 'KMeansPP', 'Bradley', 'Sorting', 'Projection', 'Luxburg', 'Split' ] high_overlap_datasets = [ 'g2-2-40', 'g2-2-50', 'g2-2-60', 'g2-2-70', 'g2-2-80', 'g2-2-90', 'g2-2-100', 'g2-4-50', 'g2-4-60', 'g2-4-70', 'g2-4-80', 'g2-4-90', 'g2-4-100', 'g2-8-70', 'g2-8-80', 'g2-8-90', 'g2-8-100', 'g2-16-90', 'g2-16-100', ] pu.figure_setup() fig_size = pu.get_fig_size(15, 7) fig = plt.figure(figsize=(fig_size)) axs = fig.subplots(ncols=2) axs[0].title.set_text(f'Baixa sobreposição') initial = [] final = [] for init_method in init_methods: initial_percentages = [] final_percentages = [] for dataset in results_dir.glob('g2*'): if dataset.stem in high_overlap_datasets: continue df = pd.read_csv(dataset / f"{init_method}.csv") n_rows = df.shape[0] initial_zeros = df['ci_initial'].value_counts().get(0, 0) initial_percentage = (initial_zeros / n_rows) * 100 initial_percentages.append(initial_percentage) final_zeros = df['ci_final'].value_counts().get(0, 0) final_percentage = (final_zeros / n_rows) * 100 final_percentages.append(final_percentage) initial.append(np.mean(initial_percentages)) final.append(np.mean(final_percentages) - np.mean(initial_percentages)) # axs[0].bar(init_methods_bar, initial, label='Inicial') axs[0].bar(init_methods_bar, initial, label='Inicial', color='gray', edgecolor='black') axs[0].bar(init_methods_bar, final, bottom=initial, label='Final', color='white', edgecolor='black') axs[0].set_ylabel('Taxa de sucesso (\%)') axs[0].tick_params('x', labelrotation=70) axs[0].set_ylim([0, 110]) axs[0].set_yticks([0, 20, 40, 60, 80, 100]) axs[0].set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']) axs[0].grid(b=False, axis='x') axs[1].title.set_text(f'Alta sobreposição') initial = [] final = [] for init_method in init_methods: initial_percentages = [] final_percentages = [] for dataset in high_overlap_datasets: df = pd.read_csv(results_dir / dataset / f"{init_method}.csv") n_rows = df.shape[0] initial_zeros = df['ci_initial'].value_counts().get(0, 0) initial_percentage = (initial_zeros / n_rows) * 100 initial_percentages.append(initial_percentage) final_zeros = df['ci_final'].value_counts().get(0, 0) final_percentage = (final_zeros / n_rows) * 100 final_percentages.append(final_percentage) initial.append(np.mean(initial_percentages)) final.append(np.mean(final_percentages) - np.mean(initial_percentages)) init_methods[3] = 'KMeansPP' axs[1].bar(init_methods_bar, initial, label='Inicial', color='gray', edgecolor='black') axs[1].bar(init_methods_bar, final, bottom=initial, label='Final', color='white', edgecolor='black') axs[1].tick_params('x', labelrotation=70) axs[1].set_ylim([0, 110]) axs[1].set_yticks([0, 20, 40, 60, 80, 100]) axs[1].set_yticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']) axs[1].grid(b=False, axis='x') plt.legend() plt.tight_layout() filename = get_project_results_dir().joinpath('fig13.eps') return fig, str(filename)