def calculate_analysis_values(embeddings_linkage, true_clusters): """ Calculates the analysis values out of the embedding linkage. :param embeddings_linkage: The linkage we calculate the values for. :param true_clusters: The validation clusters :return: misclassification rate, homogeneity Score, completeness score and the thresholds. """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') thresholds = embeddings_linkage[:, 2] threshold_shape = thresholds.shape # Initialize output mrs = np.ones(threshold_shape) homogeneity_scores = np.ones(threshold_shape) completeness_scores = np.ones(threshold_shape) # Loop over all possible clustering for i, threshold in enumerate(thresholds): predicted_clusters = fcluster(embeddings_linkage, threshold, 'distance') # Calculate different analysis's mrs[i] = misclassification_rate(true_clusters, predicted_clusters) homogeneity_scores[i] = homogeneity_score(true_clusters, predicted_clusters) completeness_scores[i] = completeness_score(true_clusters, predicted_clusters) return mrs, homogeneity_scores, completeness_scores, thresholds
def calculate_analysis_values(predicted_clusters, true_cluster): """ Calculates the analysis values out of the predicted_clusters. :param predicted_clusters: The predicted Clusters of the Network. :param true_clusters: The validation clusters :return: misclassification rate, homogeneity Score, completeness score and the thresholds. """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') # Initialize output mrs = np.ones(len(true_cluster)) homogeneity_scores = np.ones(len(true_cluster)) completeness_scores = np.ones(len(true_cluster)) # Loop over all possible clustering for i, predicted_cluster in enumerate(predicted_clusters): # Calculate different analysis's mrs[i] = misclassification_rate(true_cluster, predicted_cluster) homogeneity_scores[i] = homogeneity_score(true_cluster, predicted_cluster) completeness_scores[i] = completeness_score(true_cluster, predicted_cluster) return mrs, homogeneity_scores, completeness_scores
def calculate_analysis_values(predicted_clusters, true_cluster, cluster_count=None, mr_list=None, mr_dict_index=None): """ Calculates the analysis values out of the predicted_clusters. :param predicted_clusters: The predicted Clusters of the Network. :param true_clusters: The validation clusters :return: misclassification rate, homogeneity Score, completeness score and the thresholds. """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') # # print("------------------>>>>>>>>>>> before incremental true clusters\n") # print(true_cluster) # for i in range(len(true_cluster)): # true_cluster[i] += 1 # # print("------------------>>>>>>>>>>> after incremental true clusters\n") # print(true_cluster) # Initialize output mrs = np.ones(len(true_cluster)) homogeneity_scores = np.ones(len(true_cluster)) completeness_scores = np.ones(len(true_cluster)) # Loop over all possible clustering for i, predicted_cluster in enumerate(predicted_clusters): # Calculate different analysis's mrs[i] = misclassification_rate(true_cluster, predicted_cluster) homogeneity_scores[i] = homogeneity_score(true_cluster, predicted_cluster) completeness_scores[i] = completeness_score(true_cluster, predicted_cluster) #print("---------------------------------->>>>>>>>>>>>>>>>>>>>...") #print(i, predicted_cluster) if cluster_count is not None and ((max(predicted_cluster) == cluster_count and mr_dict_index == 'AHC') or (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'K-MEANS') or (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'SP') or (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'K-MEDOIDS')): if mr_dict_index is not None: mr_list[mr_dict_index].append(mrs[i]) else: mr_list[cluster_count] = mrs[i] elif cluster_count is not None and mr_dict_index == "DS": if mr_dict_index is not None: mr_list[mr_dict_index].append(mrs[i]) else: mr_list[cluster_count] = mrs[i] #print(">>>>>>>>>>>>>>>>>>>>>>>>>> Predicted and true clusters") #print(predicted_clusters, true_cluster) return mrs, homogeneity_scores, completeness_scores
def _calculate_analysis_values(predicted_clusters, true_cluster, times): """ Calculates the analysis values out of the predicted_clusters. :param predicted_clusters: The predicted Clusters of the Network. :param true_clusters: The validation clusters :return: the results of all metrics as a 2D array where i is the index of the metric and j is the index of a specific result """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') # Initialize output metric_results = [None] * len(metric_names) for m, min_value in enumerate(metric_min_values): if min_value == 1: metric_results[m] = np.ones(len(true_cluster)) else: metric_results[m] = np.zeros((len(true_cluster))) # Loop over all possible clustering for i, predicted_cluster in enumerate(predicted_clusters): logger.info('Calculated Scores for {}/{} predicted clusters'.format( i, len(predicted_clusters))) # Calculate different analysis's metric_results[0][i] = misclassification_rate(true_cluster, predicted_cluster) metric_results[1][i] = average_cluster_purity(true_cluster, predicted_cluster) metric_results[2][i] = adjusted_rand_index(true_cluster, predicted_cluster) metric_results[3][i] = diarization_error_rate(true_cluster, predicted_cluster, times) return metric_results
def calculate_analysis_values(predicted_clusters, true_cluster, alorithm, vector,j , cluster_count=None, mr_list=None , mr_list2=None): """ Calculates the analysis values out of the predicted_clusters. :param predicted_clusters: The predicted Clusters of the Network. :param true_clusters: The validation clusters :return: misclassification rate, homogeneity Score, completeness score and the thresholds. """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') # # print("------------------>>>>>>>>>>> before incremental true clusters\n") # print(true_cluster) # for i in range(len(true_cluster)): # true_cluster[i] += 1 # # print("------------------>>>>>>>>>>> after incremental true clusters\n") # print(true_cluster) # Initialize output mrs = np.ones(len(true_cluster)) homogeneity_scores = np.ones(len(true_cluster)) completeness_scores = np.ones(len(true_cluster)) # Loop over all possible clustering for i, predicted_cluster in enumerate(predicted_clusters): # Calculate different analysis's # print(i) # print("\n") # print(predicted_cluster) if alorithm == "K_Means_Clustering": print("True Clusters") print(true_cluster) print("\n") print("Predicted Clusters") print(predicted_clusters) print("\n") mrs[i] = misclassification_rate(true_cluster, predicted_cluster) print("...................MR value of K means................................\n") print(mrs[i]) print("\n") if cluster_count is not None: temp = str(cluster_count) + "_" + str(vector) + "_" + str(j) mr_list2[temp] = mrs[i] print(str(mrs[i]) + " added to mr_list2\n") if alorithm == "Agglomerative_Hierachial_Clustering": mrs[i] = misclassification_rate(true_cluster, predicted_cluster) print(mrs[i]) if cluster_count is not None and (max(predicted_cluster) == cluster_count): temp = str(cluster_count) + "_" + str(vector) + "_" + str(j) mr_list[temp] = mrs[i] print("...................MR value of Hirechial Clustering................................\n") print(mrs[i]) print("\n") print(str(mrs[i]) + " added to mr_list\n") homogeneity_scores[i] = homogeneity_score(true_cluster, predicted_cluster) completeness_scores[i] = completeness_score(true_cluster, predicted_cluster) return mrs, homogeneity_scores, completeness_scores ,mr_list