Python get_distances_of_elements_to 예제들, pyproct.clustering.evaluation.metrics.common.get_distances_of_elements_to Python 예제들

예제 #1

0

파일 보기

파일: overlapCalculator.py 프로젝트: gabocic/python

    def get_cluster_min_max_distances(cls, decomposed_cluster,
                                      distance_matrix):
        """

        """
        allIds = decomposed_cluster.keys()
        min_distances = []
        max_distances = []

        if len(
                allIds
        ) > 1:  # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append(
                        numpy.min(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
                    max_distances.append(
                        numpy.max(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
        return numpy.array(min_distances), numpy.array(max_distances)

예제 #2

0

파일 보기

파일: compactness.py 프로젝트: gabocic/python

 def cluster_variance(cls, cluster, matrix):
     """
     precondition, cluster medoid (prototype) it's alread
     """
     return numpy.var(
         get_distances_of_elements_to(cluster.prototype,
                                      cluster.all_elements, matrix))

예제 #3

0

파일 보기

파일: overlapCalculator.py 프로젝트: ztypaker/pyProCT

    def get_cluster_min_distances(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the distances between the elements of all different classes in the cluster
        and returns the minimum distance for each of these elements.
        Some distances will be counted twice. This is OK.
        """
        allIds = decomposed_cluster.keys()
        min_distances = []

        if len(
                allIds
        ) > 1:  # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append(
                        numpy.min(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
            return numpy.array(min_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

예제 #4

0

파일 보기

파일: overlapCalculator.py 프로젝트: migonsu/pyProCT

    def get_cluster_min_max_distances(cls, decomposed_cluster, distance_matrix):
        """

        """
        allIds = decomposed_cluster.keys()
        min_distances = []
        max_distances = []

        if len(allIds)>1: # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append( numpy.min(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
                    max_distances.append( numpy.max(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
            return numpy.array(min_distances), numpy.array(max_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

예제 #5

0

파일 보기

 def calculate_average_distance_from_prototype(cls, cluster, matrix):
     """
     Returns the average distance of the elements of a cluster with its medoid.
     @param cluster: The cluster from which we want to calculate this distance.
     @param matrix: The condensed matrix containing all distances.
     @return: The calculated value.
     """
     proto = cluster.prototype
     elements_copy = list(cluster.all_elements)
     elements_copy.remove(proto)
     distances = get_distances_of_elements_to(proto, elements_copy, matrix)
     if distances == []:
         return 0.
     else:
         return numpy.mean(distances)

예제 #6

0

파일 보기

파일: daviesBouldin.py 프로젝트: migonsu/pyProCT

 def calculate_average_distance_from_prototype(cls, cluster, matrix):
     """
     Returns the average distance of the elements of a cluster with its medoid.
     @param cluster: The cluster from which we want to calculate this distance.
     @param matrix: The condensed matrix containing all distances.
     @return: The calculated value.
     """
     proto = cluster.prototype
     elements_copy = list(cluster.all_elements)
     elements_copy.remove(proto)
     distances = get_distances_of_elements_to(proto, elements_copy, matrix)
     if distances == []:
         return 0.
     else:
         return numpy.mean(distances)

예제 #7

0

파일 보기

파일: clusterStats.py 프로젝트: ztypaker/pyProCT

def calculate_per_cluster_stats(best_clustering, matrix, parameters,
                                results_folder):
    """
    CSV file
    """
    file_name = parameters.get_value(
        "file", default_value="per_cluster_stats") + ".csv"
    stats_file_path = os.path.join(results_folder, file_name)
    stats_file = open(stats_file_path, "w")
    header_line = ","
    for i in range(len(best_clustering.clusters)):
        cluster = best_clustering.clusters[i]
        header_line += "%s," % cluster.id
    header_line = header_line[:-1] + "\n"

    stats_file.write(header_line)

    # TODO: Once clusterings and clusters become inmutable its medoids will be always updated,
    # then this kind of operations will be unnecessary
    update_medoids(best_clustering, matrix)
    #----------------------------------------

    for i in range(len(best_clustering.clusters)):
        cluster_i = best_clustering.clusters[i]

        try:
            intra_distances = get_intra_cluster_distances(cluster_i, matrix)
            diameter = max(intra_distances)
            distances_from_proto = get_distances_of_elements_to(
                cluster_i.prototype, cluster_i.all_elements, matrix)
            radius = max(distances_from_proto)
        except SingularClusterException:
            diameter = 0
            radius = 0
        finally:
            line = "%s(d: %.2f r: %.2f)," % (cluster_i.id, diameter, radius)

        for j in range(0, i + 1):
            line += ","

        for j in range(i + 1, len(best_clustering.clusters)):
            cluster_j = best_clustering.clusters[j]
            line += "%.2f," % matrix[cluster_i.prototype, cluster_j.prototype]

        line = line[:-1] + "\n"
        stats_file.write(line)
    stats_file.close()
    return stats_file_path

예제 #8

0

파일 보기

파일: tools.py 프로젝트: ztypaker/pyProCT

def calculate_distance_stats(elements, matrix):
    """
    Calculates the mean, dispersion and radius of all the distances to the central element of a set of
    elements.

    @param elements: The elements we are working with.
    @param matrix: The used condensed matrix.

    @return: Mean, std deviation and radius of all the elements with respect to their central element.
    """
    cluster = Cluster(None, elements)
    medoid = cluster.calculate_medoid(matrix)

    # We also get a 0 distance from the medoid vs itself (it is contained in 'elements')
    distances = get_distances_of_elements_to(medoid, elements, matrix)
    return numpy.mean(distances), numpy.std(distances), numpy.max(distances)

예제 #9

0

파일 보기

파일: clusterStats.py 프로젝트: migonsu/pyProCT

def calculate_per_cluster_stats(best_clustering, matrix, parameters, results_folder):
    """
    CSV file
    """
    file_name = parameters.get_value("file", default_value = "per_cluster_stats") + ".csv"
    stats_file_path = os.path.join(results_folder,file_name)
    stats_file = open(stats_file_path,"w")
    header_line =","
    for i in range(len(best_clustering.clusters)):
        cluster = best_clustering.clusters[i]
        header_line+="%s,"%cluster.id
    header_line = header_line[:-1] +"\n"

    stats_file.write(header_line)

    # TODO: Once clusterings and clusters become inmutable its medoids will be always updated,
    # then this kind of operations will be unnecessary 
    update_medoids(best_clustering, matrix)
    #----------------------------------------
    
    for i in range(len(best_clustering.clusters)):
        cluster_i = best_clustering.clusters[i]
        
        try:
            intra_distances = get_intra_cluster_distances(cluster_i, matrix)
            diameter = max(intra_distances) 
            distances_from_proto = get_distances_of_elements_to(cluster_i.prototype, 
                                                                cluster_i.all_elements, 
                                                                matrix)
            radius = max(distances_from_proto)
        except SingularClusterException:
            diameter = 0
            radius = 0
        finally:
            line = "%s(d: %.2f r: %.2f),"%(cluster_i.id, diameter, radius)

        for j in range(0, i+1):
            line += ","

        for j in range(i+1, len(best_clustering.clusters)):
            cluster_j = best_clustering.clusters[j]
            line+="%.2f,"%matrix[ cluster_i.prototype, cluster_j.prototype]

        line = line[:-1] + "\n"
        stats_file.write(line)
    stats_file.close()
    return stats_file_path

예제 #10

0

파일 보기

    def evaluate(self, clustering, matrix):
        """
        Mean is approximated to medoid.
        """
        update_medoids(clustering, matrix)

        global_cluster = Cluster(None, clustering.get_all_clustered_elements())
        global_cluster.prototype = global_cluster.calculate_medoid(matrix)
        global_variance = numpy.var(get_distances_of_elements_to(global_cluster.prototype,
                                                                 global_cluster.all_elements,
                                                                 matrix))
        variances = [self.cluster_variance(cluster,matrix) for cluster in clustering.clusters]

        sum_ci = numpy.sum(variances)

        Cmp = sum_ci / (len(clustering.clusters)*global_variance)

        return Cmp

예제 #11

0

파일 보기

파일: overlapCalculator.py 프로젝트: victor-gil-sepulveda/pyProCT

    def get_cluster_min_distances(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the distances between the elements of all different classes in the cluster
        and returns the minimum distance for each of these elements.
        Some distances will be counted twice. This is OK.
        """
        allIds = decomposed_cluster.keys()
        min_distances = []

        if len(allIds)>1: # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append( numpy.min(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
            return numpy.array(min_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

예제 #12

0

파일 보기

 def test_get_distances_of_elements_to(self):
     matrix = CondensedMatrix(list(squared_CH_table1))
     numpy.testing.assert_equal(get_distances_of_elements_to(3, [0,1,2,4,5], matrix), [11.0, 6.0, 6.0, 13.0, 15.0])

예제 #13

0

파일 보기

파일: compactness.py 프로젝트: migonsu/pyProCT

 def cluster_variance(cls, cluster, matrix):
     """
     precondition, cluster medoid (prototype) it's alread
     """
     return numpy.var(get_distances_of_elements_to(cluster.prototype, cluster.all_elements, matrix))