コード例 #1
0
def calculate_analysis_values(embeddings_linkage, true_clusters):
    """
    Calculates the analysis values out of the embedding linkage.

    :param embeddings_linkage: The linkage we calculate the values for.
    :param true_clusters: The validation clusters
    :return: misclassification rate, homogeneity Score, completeness score and the thresholds.
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Calculate scores')

    thresholds = embeddings_linkage[:, 2]
    threshold_shape = thresholds.shape

    # Initialize output
    mrs = np.ones(threshold_shape)
    homogeneity_scores = np.ones(threshold_shape)
    completeness_scores = np.ones(threshold_shape)

    # Loop over all possible clustering
    for i, threshold in enumerate(thresholds):
        predicted_clusters = fcluster(embeddings_linkage, threshold, 'distance')

        # Calculate different analysis's
        mrs[i] = misclassification_rate(true_clusters, predicted_clusters)
        homogeneity_scores[i] = homogeneity_score(true_clusters, predicted_clusters)
        completeness_scores[i] = completeness_score(true_clusters, predicted_clusters)

    return mrs, homogeneity_scores, completeness_scores, thresholds
コード例 #2
0
def calculate_analysis_values(predicted_clusters, true_cluster):
    """
    Calculates the analysis values out of the predicted_clusters.

    :param predicted_clusters: The predicted Clusters of the Network.
    :param true_clusters: The validation clusters
    :return: misclassification rate, homogeneity Score, completeness score and the thresholds.
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Calculate scores')

    # Initialize output
    mrs = np.ones(len(true_cluster))
    homogeneity_scores = np.ones(len(true_cluster))
    completeness_scores = np.ones(len(true_cluster))

    # Loop over all possible clustering
    for i, predicted_cluster in enumerate(predicted_clusters):
        # Calculate different analysis's
        mrs[i] = misclassification_rate(true_cluster, predicted_cluster)
        homogeneity_scores[i] = homogeneity_score(true_cluster,
                                                  predicted_cluster)
        completeness_scores[i] = completeness_score(true_cluster,
                                                    predicted_cluster)

    return mrs, homogeneity_scores, completeness_scores
def calculate_analysis_values(predicted_clusters, true_cluster, cluster_count=None, mr_list=None, mr_dict_index=None):
    """
    Calculates the analysis values out of the predicted_clusters.

    :param predicted_clusters: The predicted Clusters of the Network.
    :param true_clusters: The validation clusters
    :return: misclassification rate, homogeneity Score, completeness score and the thresholds.
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Calculate scores')

    #
    # print("------------------>>>>>>>>>>>  before incremental true clusters\n")
    # print(true_cluster)
    # for i in range(len(true_cluster)):
    #     true_cluster[i] += 1
    #
    # print("------------------>>>>>>>>>>>  after incremental true clusters\n")
    # print(true_cluster)

    # Initialize output
    mrs = np.ones(len(true_cluster))
    homogeneity_scores = np.ones(len(true_cluster))
    completeness_scores = np.ones(len(true_cluster))

    # Loop over all possible clustering
    for i, predicted_cluster in enumerate(predicted_clusters):
        # Calculate different analysis's
        mrs[i] = misclassification_rate(true_cluster, predicted_cluster)
        homogeneity_scores[i] = homogeneity_score(true_cluster, predicted_cluster)
        completeness_scores[i] = completeness_score(true_cluster, predicted_cluster)
        #print("---------------------------------->>>>>>>>>>>>>>>>>>>>...")
        #print(i, predicted_cluster)
        if cluster_count is not None and ((max(predicted_cluster) == cluster_count and mr_dict_index == 'AHC') or
                                          (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'K-MEANS') or
                                          (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'SP') or
                                          (max(predicted_cluster) == cluster_count -1 and mr_dict_index == 'K-MEDOIDS')):
            if mr_dict_index is not None:
                mr_list[mr_dict_index].append(mrs[i])
            else:
                mr_list[cluster_count] = mrs[i]
        elif cluster_count is not None and mr_dict_index == "DS":
            if mr_dict_index is not None:
                mr_list[mr_dict_index].append(mrs[i])
            else:
                mr_list[cluster_count] = mrs[i]

        #print(">>>>>>>>>>>>>>>>>>>>>>>>>> Predicted and true clusters")
        #print(predicted_clusters, true_cluster)

    return mrs, homogeneity_scores, completeness_scores
コード例 #4
0
def _calculate_analysis_values(predicted_clusters, true_cluster, times):
    """
    Calculates the analysis values out of the predicted_clusters.

    :param predicted_clusters: The predicted Clusters of the Network.
    :param true_clusters: The validation clusters
    :return: the results of all metrics as a 2D array where i is the index of the metric and j is the index of a
        specific result
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Calculate scores')

    # Initialize output
    metric_results = [None] * len(metric_names)
    for m, min_value in enumerate(metric_min_values):
        if min_value == 1:
            metric_results[m] = np.ones(len(true_cluster))
        else:
            metric_results[m] = np.zeros((len(true_cluster)))

    # Loop over all possible clustering
    for i, predicted_cluster in enumerate(predicted_clusters):
        logger.info('Calculated Scores for {}/{} predicted clusters'.format(
            i, len(predicted_clusters)))
        # Calculate different analysis's
        metric_results[0][i] = misclassification_rate(true_cluster,
                                                      predicted_cluster)
        metric_results[1][i] = average_cluster_purity(true_cluster,
                                                      predicted_cluster)
        metric_results[2][i] = adjusted_rand_index(true_cluster,
                                                   predicted_cluster)
        metric_results[3][i] = diarization_error_rate(true_cluster,
                                                      predicted_cluster, times)

    return metric_results
コード例 #5
0
def calculate_analysis_values(predicted_clusters, true_cluster, alorithm, vector,j , cluster_count=None, mr_list=None , mr_list2=None):
    """
    Calculates the analysis values out of the predicted_clusters.

    :param predicted_clusters: The predicted Clusters of the Network.
    :param true_clusters: The validation clusters
    :return: misclassification rate, homogeneity Score, completeness score and the thresholds.
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Calculate scores')
    #
    # print("------------------>>>>>>>>>>>  before incremental true clusters\n")
    # print(true_cluster)
    # for i in range(len(true_cluster)):
    #     true_cluster[i] += 1
    #
    # print("------------------>>>>>>>>>>>  after incremental true clusters\n")
    # print(true_cluster)

    # Initialize output
    mrs = np.ones(len(true_cluster))
    homogeneity_scores = np.ones(len(true_cluster))
    completeness_scores = np.ones(len(true_cluster))

    # Loop over all possible clustering
    for i, predicted_cluster in enumerate(predicted_clusters):
        # Calculate different analysis's
        # print(i)
        # print("\n")
        # print(predicted_cluster)

        if alorithm == "K_Means_Clustering":

            print("True Clusters")
            print(true_cluster)
            print("\n")
            print("Predicted Clusters")
            print(predicted_clusters)
            print("\n")

            mrs[i] = misclassification_rate(true_cluster, predicted_cluster)
            print("...................MR value of K means................................\n")
            print(mrs[i])
            print("\n")
            if cluster_count is not None:

                temp = str(cluster_count) + "_" + str(vector) + "_" + str(j)
                mr_list2[temp] = mrs[i]
                print(str(mrs[i]) + " added to mr_list2\n")

        if alorithm == "Agglomerative_Hierachial_Clustering":
            mrs[i] = misclassification_rate(true_cluster, predicted_cluster)
            print(mrs[i])

            if cluster_count is not None and (max(predicted_cluster) == cluster_count):
                temp = str(cluster_count) + "_" + str(vector) + "_" + str(j)
                mr_list[temp] = mrs[i]
                print("...................MR value of Hirechial Clustering................................\n")
                print(mrs[i])
                print("\n")
                print(str(mrs[i]) + " added to mr_list\n")

        homogeneity_scores[i] = homogeneity_score(true_cluster, predicted_cluster)
        completeness_scores[i] = completeness_score(true_cluster, predicted_cluster)

    return mrs, homogeneity_scores, completeness_scores ,mr_list