def save_best_results(network_name, checkpoint_names, set_of_thresholds, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, speaker_numbers): if len(set_of_mrs) == 1: write_result_pickle(network_name + "_best", checkpoint_names, set_of_thresholds, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, speaker_numbers) else: # Find best result (min MR) min_mrs = [] for mrs in set_of_mrs: min_mrs.append(np.min(mrs)) min_mr_over_all = min(min_mrs) best_checkpoint_name = [] set_of_best_mrs = [] set_of_best_homogeneity_scores = [] set_of_best_completeness_scores = [] set_of_best_thresholds = [] best_speaker_numbers = [] for index, min_mr in enumerate(min_mrs): if min_mr == min_mr_over_all: best_checkpoint_name.append(checkpoint_names[index]) set_of_best_mrs.append(set_of_mrs[index]) set_of_best_homogeneity_scores.append(set_of_homogeneity_scores[index]) set_of_best_completeness_scores.append(set_of_completeness_scores[index]) set_of_best_thresholds.append(set_of_thresholds[index]) best_speaker_numbers.append(speaker_numbers[index]) write_result_pickle(network_name + "_best", best_checkpoint_name, set_of_best_thresholds, set_of_best_mrs, set_of_best_homogeneity_scores, set_of_best_completeness_scores, best_speaker_numbers)
def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings): """ Plots all specified curves and saves the plot into a file. :param plot_file_name: String value of save file name :param curve_names: Set of names used in legend to describe this curve :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve """ logger = get_logger('analysis', logging.INFO) logger.info('Plot results') logger.info(plot_file_name) min_mrs = [] for mr in mrs: min_mrs.append(np.min(mr)) min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings = \ (list(t) for t in zip(*sorted(zip(min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings)))) # How many lines to plot number_of_lines = len(curve_names) # Get various colors needed to plot color_map = plt.get_cmap('gist_rainbow') colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)] # Define number of figures fig1 = plt.figure(1) fig1.set_size_inches(16, 8) # Define Plots mr_plot = plt.subplot2grid((2, 3), (0, 0), colspan=2) mr_plot.set_ylabel('MR') mr_plot.set_xlabel('number of clusters') plt.ylim([-0.02, 1.02]) completeness_scores_plot = add_cluster_subplot(fig1, 234, 'completeness_scores') homogeneity_scores_plot = add_cluster_subplot(fig1, 235, 'homogeneity_scores') # Define curves and their values curves = [[mr_plot, mrs], [homogeneity_scores_plot, homogeneity_scores], [completeness_scores_plot, completeness_scores]] # Plot all curves for index in range(number_of_lines): label = curve_names[index] + '\n min MR: ' + str(min_mrs[index]) color = colors[index] number_of_clusters = np.arange(number_of_embeddings[index], 0, -1) for plot, value in curves: plot.plot(number_of_clusters, value[index], color=color, label=label) # Add legend and save the plot fig1.legend() # fig1.show() fig1.savefig(get_result_png(plot_file_name)) fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
def _save_best_results(network_name, checkpoint_names, metric_sets, speaker_numbers): if len(metric_sets[0]) == 1: _write_result_pickle(network_name + "_best", checkpoint_names, metric_sets, speaker_numbers) else: # Find best result (according to the first metric in metrics) if (metric_min_values[0] == 1): best_results = [] for results in metric_sets[0]: best_results.append(np.min(results)) best_result_over_all = min(best_results) else: best_results = [] for results in metric_sets[0]: best_results.append(np.max(results)) best_result_over_all = max(best_results) best_checkpoint_name = [] set_of_best_metrics = [[] for _ in metric_sets] best_speaker_numbers = [] for index, best_result in enumerate(best_results): if best_result == best_result_over_all: best_checkpoint_name.append(checkpoint_names[index]) for m, metric_set in enumerate(metric_sets): set_of_best_metrics[m].append(metric_set[index]) best_speaker_numbers.append(speaker_numbers[index]) _write_result_pickle(network_name + "_best", best_checkpoint_name, set_of_best_metrics, best_speaker_numbers)
def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings): """ Plots all specified curves and saves the plot into a file. """ logger = get_logger('analysis', logging.INFO) logger.info('Plot results') # How many lines to plot number_of_lines = len(curve_names) # Get various colors needed to plot color_map = plt.get_cmap('gist_rainbow') colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)] # Define number of figures fig1 = plt.figure(1) fig1.set_size_inches(32, 24) # Define Plots mr_plot = plt.subplot2grid((2, 2), (0, 0), colspan=2) mr_plot.set_title('MR') mr_plot.set_xlabel('number of clusters') mr_plot.axis([0, 80, 0, 1]) completeness_scores_plot = add_cluster_subplot(fig1, 223, 'completeness_scores') homogeneity_scores_plot = add_cluster_subplot(fig1, 224, 'homogeneity_scores') # Define curves and their values curves = [[mr_plot, mrs], [homogeneity_scores_plot, homogeneity_scores], [completeness_scores_plot, completeness_scores]] # Plot all curves for index in range(number_of_lines): label = curve_names[index] color = colors[index] number_of_clusters = np.arange(number_of_embeddings[index], 1, -1) for plot, value in curves: plot.plot(number_of_clusters, value[index], color=color, label=label) min_mr = np.min(mrs[index]) mr_plot.annotate(str(min_mr), xy=(0, min_mr)) # Add legend and save the plot fig1.legend() # fig1.show() fig1.savefig(get_result_png(plot_file_name))
def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings,loaded_dict,loaded_dict2,loaded_dict3): """ Plots all specified curves and saves the plot into a file. :param plot_file_name: String value of save file name :param curve_names: Set of names used in legend to describe this curve :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve """ logger = get_logger('analysis', logging.INFO) logger.info('Plot results') logger.info(plot_file_name) min_mrs = [] for mr in mrs: min_mrs.append(np.min(mr)) num_clusters=[] hierach_MR = [] kmeans_mr = [] ds_mr = [] for x in loaded_dict: num_clusters.append(x) hierach_MR.append(loaded_dict[x]) # print(str(loaded_dict[x]) + " appended to " + str(x)) print("\n") print("Hierachial MR") print(hierach_MR) for x in loaded_dict2: kmeans_mr.append(loaded_dict2[x]) # print(str(loaded_dict[x]) + " appended to " + str(x)) print("\n") print("Kmeans MR") print(kmeans_mr) for x in loaded_dict3: ds_mr.append(loaded_dict3[x]) # print(str(loaded_dict[x]) + " appended to " + str(x)) print("\n") print("DS MR") print(ds_mr) ks = list(loaded_dict) ks = list(map(int, ks)) print("\n") print("Cluster Count") print(ks) print("Minimum Cluster : " + str(min(ks))) print("Maximum CLuster : " + str(max(ks))+ "\n") maxc = max(ks) minc = min(ks) # x = zip(*sorted(zip(min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings))) # # # print("\n") # print(tuple(x)) # print("\n") # # min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings = \ # (list(t) for t in x) # How many lines to plot number_of_lines = len(curve_names) # Get various colors needed to plot color_map = plt.get_cmap('gist_rainbow') colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)] # Define number of figures fig1 = plt.figure(1) fig1.set_size_inches(16, 8) # Define Plots mr_plot = plt.subplot2grid((2, 3), (0, 0), colspan=2) mr_plot.set_ylabel('MR') mr_plot.set_xlabel('Number of clusters') plt.grid(True) plt.axis([minc,maxc, -0.02,1.2]) # print(mrs) # # # for i in range(1,len(mrs),2): # print(mrs[i]) # print(mrs[i][0]) # kmeans_mr.append(mrs[i][0]) # print(str(mrs[i][0]) + " appended " completeness_scores_plot = add_cluster_subplot(fig1, 234, 'completeness_scores') homogeneity_scores_plot = add_cluster_subplot(fig1, 235, 'homogeneity_scores') value = [hierach_MR , kmeans_mr, ds_mr] # Define curves and their values curves = [[mr_plot, value]] algorithm = ["Agglomerative_Hierachial_Clustering", "K_Means_Clustering", "DominantSets_Clustering"] # Plot all curves for index in range(3): label = algorithm[index] color = colors[index] # number_of_clusters = np.arange(number_of_embeddings[index], 0, -1) for plot, value in curves: print(value[index]) plot.plot(ks,value[index], color=color, label=label) # Add legend and save the plot fig1.legend() # fig1.show() fig1.savefig(get_result_png(plot_file_name)) print("Plot File saved in " + get_result_png(plot_file_name) ) fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
def _plot_curves(plot_file_name, curve_names, metric_sets, number_of_embeddings): """ Plots all specified curves and saves the plot into a file. :param plot_file_name: String value of save file name :param curve_names: Set of names used in legend to describe this curve :param metric_sets: A list of 2D matrices, each row of a metrics 2D matrix describes one dataset for a curve :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve """ logger = get_logger('analysis', logging.INFO) logger.info('Plot results') config = load_config(None, join(get_common(), 'config.cfg')) plot_width = config.getint('common', 'plot_width') fig_width = config.getint('common', 'fig_width') fig_height = config.getint('common', 'fig_height') #Slice results to only 1-80 clusters for i in range(0, len(metric_sets)): for j in range(0, len(metric_sets[i])): metric_sets[i][j] = metric_sets[i][j][-plot_width:] print(len(metric_sets[i][j])) best_results = [[] for _ in metric_names] for m, min_value in enumerate(metric_min_values): for results in metric_sets[m]: if (metric_min_values[m] == 0): best_results[m].append(np.max(results)) else: best_results[m].append(np.min(results)) ''' This code is used to sort the lines by min mr. Because we now use mutliple metrics and dont sort by a single metric, this code is not used anymore, but we keep it for now. min_mrs, curve_names, mrs, acps, aris, homogeneity_scores, completeness_scores, number_of_embeddings = \ (list(t) for t in zip(*sorted(zip(min_mrs, curve_names, mrs, acps, aris, homogeneity_scores, completeness_scores, number_of_embeddings)))) ''' # How many lines to plot number_of_lines = len(curve_names) # Get various colors needed to plot color_map = plt.get_cmap('gist_rainbow') colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)] #Set fontsize for all plots plt.rcParams.update({'font.size': 12}) # Define number of figures fig1 = plt.figure(figsize=(fig_width, fig_height)) # Define Plots plot_grid = (3, 2) plots = [None] * len(metric_names) plots[0] = _add_cluster_subplot(plot_grid, (0, 0), metric_names[0], 1) plots[1] = _add_cluster_subplot(plot_grid, (0, 1), metric_names[1], 1) plots[2] = _add_cluster_subplot(plot_grid, (1, 0), metric_names[2], 1) plots[3] = _add_cluster_subplot(plot_grid, (1, 1), metric_names[3], 1) #Set the horizontal space between subplots plt.subplots_adjust(hspace=0.3) # Define curves and their values curves = [[] for _ in metric_names] for m, metric_set in enumerate(metric_sets): curves[m] = [plots[m], metric_set] # Plot all curves for index in range(number_of_lines): label = curve_names[index] for m, metric_name in enumerate(metric_names): label = label + '\n {} {}: {}'.format( 'Max' if metric_min_values[m] == 0 else 'Min', metric_name, str(best_results[m][index])) color = colors[index] number_of_clusters = np.arange(plot_width, 0, -1) for plot, value in curves: plot.plot(number_of_clusters, value[index], color=color, label=label) # Add legend and save the plot fig1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.33), ncol=4) #fig1.show() fig1.savefig(get_result_png(plot_file_name)) fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')