Python get_distances_of_elements_to Beispiele, pyproct.clustering.evaluation.metrics.common.get_distances_of_elements_to Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: overlapCalculator.py Projekt: gabocic/python

    def get_cluster_min_max_distances(cls, decomposed_cluster,
                                      distance_matrix):
        """

        """
        allIds = decomposed_cluster.keys()
        min_distances = []
        max_distances = []

        if len(
                allIds
        ) > 1:  # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append(
                        numpy.min(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
                    max_distances.append(
                        numpy.max(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
        return numpy.array(min_distances), numpy.array(max_distances)

Beispiel #2

0

Datei anzeigen

Datei: compactness.py Projekt: gabocic/python

 def cluster_variance(cls, cluster, matrix):
     """
     precondition, cluster medoid (prototype) it's alread
     """
     return numpy.var(
         get_distances_of_elements_to(cluster.prototype,
                                      cluster.all_elements, matrix))

Beispiel #3

0

Datei anzeigen

Datei: overlapCalculator.py Projekt: ztypaker/pyProCT

    def get_cluster_min_distances(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the distances between the elements of all different classes in the cluster
        and returns the minimum distance for each of these elements.
        Some distances will be counted twice. This is OK.
        """
        allIds = decomposed_cluster.keys()
        min_distances = []

        if len(
                allIds
        ) > 1:  # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append(
                        numpy.min(
                            get_distances_of_elements_to(
                                element, vs_elements, distance_matrix)))
            return numpy.array(min_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

Beispiel #4

0

Datei anzeigen

Datei: overlapCalculator.py Projekt: migonsu/pyProCT

    def get_cluster_min_max_distances(cls, decomposed_cluster, distance_matrix):
        """

        """
        allIds = decomposed_cluster.keys()
        min_distances = []
        max_distances = []

        if len(allIds)>1: # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append( numpy.min(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
                    max_distances.append( numpy.max(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
            return numpy.array(min_distances), numpy.array(max_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

Beispiel #5

0

Datei anzeigen

 def calculate_average_distance_from_prototype(cls, cluster, matrix):
     """
     Returns the average distance of the elements of a cluster with its medoid.
     @param cluster: The cluster from which we want to calculate this distance.
     @param matrix: The condensed matrix containing all distances.
     @return: The calculated value.
     """
     proto = cluster.prototype
     elements_copy = list(cluster.all_elements)
     elements_copy.remove(proto)
     distances = get_distances_of_elements_to(proto, elements_copy, matrix)
     if distances == []:
         return 0.
     else:
         return numpy.mean(distances)

Beispiel #6

0

Datei anzeigen

Datei: daviesBouldin.py Projekt: migonsu/pyProCT

 def calculate_average_distance_from_prototype(cls, cluster, matrix):
     """
     Returns the average distance of the elements of a cluster with its medoid.
     @param cluster: The cluster from which we want to calculate this distance.
     @param matrix: The condensed matrix containing all distances.
     @return: The calculated value.
     """
     proto = cluster.prototype
     elements_copy = list(cluster.all_elements)
     elements_copy.remove(proto)
     distances = get_distances_of_elements_to(proto, elements_copy, matrix)
     if distances == []:
         return 0.
     else:
         return numpy.mean(distances)

Beispiel #7

0

Datei anzeigen

Datei: clusterStats.py Projekt: ztypaker/pyProCT

def calculate_per_cluster_stats(best_clustering, matrix, parameters,
                                results_folder):
    """
    CSV file
    """
    file_name = parameters.get_value(
        "file", default_value="per_cluster_stats") + ".csv"
    stats_file_path = os.path.join(results_folder, file_name)
    stats_file = open(stats_file_path, "w")
    header_line = ","
    for i in range(len(best_clustering.clusters)):
        cluster = best_clustering.clusters[i]
        header_line += "%s," % cluster.id
    header_line = header_line[:-1] + "\n"

    stats_file.write(header_line)

    # TODO: Once clusterings and clusters become inmutable its medoids will be always updated,
    # then this kind of operations will be unnecessary
    update_medoids(best_clustering, matrix)
    #----------------------------------------

    for i in range(len(best_clustering.clusters)):
        cluster_i = best_clustering.clusters[i]

        try:
            intra_distances = get_intra_cluster_distances(cluster_i, matrix)
            diameter = max(intra_distances)
            distances_from_proto = get_distances_of_elements_to(
                cluster_i.prototype, cluster_i.all_elements, matrix)
            radius = max(distances_from_proto)
        except SingularClusterException:
            diameter = 0
            radius = 0
        finally:
            line = "%s(d: %.2f r: %.2f)," % (cluster_i.id, diameter, radius)

        for j in range(0, i + 1):
            line += ","

        for j in range(i + 1, len(best_clustering.clusters)):
            cluster_j = best_clustering.clusters[j]
            line += "%.2f," % matrix[cluster_i.prototype, cluster_j.prototype]

        line = line[:-1] + "\n"
        stats_file.write(line)
    stats_file.close()
    return stats_file_path

Beispiel #8

0

Datei anzeigen

Datei: tools.py Projekt: ztypaker/pyProCT

def calculate_distance_stats(elements, matrix):
    """
    Calculates the mean, dispersion and radius of all the distances to the central element of a set of
    elements.

    @param elements: The elements we are working with.
    @param matrix: The used condensed matrix.

    @return: Mean, std deviation and radius of all the elements with respect to their central element.
    """
    cluster = Cluster(None, elements)
    medoid = cluster.calculate_medoid(matrix)

    # We also get a 0 distance from the medoid vs itself (it is contained in 'elements')
    distances = get_distances_of_elements_to(medoid, elements, matrix)
    return numpy.mean(distances), numpy.std(distances), numpy.max(distances)

Beispiel #9

0

Datei anzeigen

Datei: clusterStats.py Projekt: migonsu/pyProCT

def calculate_per_cluster_stats(best_clustering, matrix, parameters, results_folder):
    """
    CSV file
    """
    file_name = parameters.get_value("file", default_value = "per_cluster_stats") + ".csv"
    stats_file_path = os.path.join(results_folder,file_name)
    stats_file = open(stats_file_path,"w")
    header_line =","
    for i in range(len(best_clustering.clusters)):
        cluster = best_clustering.clusters[i]
        header_line+="%s,"%cluster.id
    header_line = header_line[:-1] +"\n"

    stats_file.write(header_line)

    # TODO: Once clusterings and clusters become inmutable its medoids will be always updated,
    # then this kind of operations will be unnecessary 
    update_medoids(best_clustering, matrix)
    #----------------------------------------
    
    for i in range(len(best_clustering.clusters)):
        cluster_i = best_clustering.clusters[i]
        
        try:
            intra_distances = get_intra_cluster_distances(cluster_i, matrix)
            diameter = max(intra_distances) 
            distances_from_proto = get_distances_of_elements_to(cluster_i.prototype, 
                                                                cluster_i.all_elements, 
                                                                matrix)
            radius = max(distances_from_proto)
        except SingularClusterException:
            diameter = 0
            radius = 0
        finally:
            line = "%s(d: %.2f r: %.2f),"%(cluster_i.id, diameter, radius)

        for j in range(0, i+1):
            line += ","

        for j in range(i+1, len(best_clustering.clusters)):
            cluster_j = best_clustering.clusters[j]
            line+="%.2f,"%matrix[ cluster_i.prototype, cluster_j.prototype]

        line = line[:-1] + "\n"
        stats_file.write(line)
    stats_file.close()
    return stats_file_path

Beispiel #10

0

Datei anzeigen

    def evaluate(self, clustering, matrix):
        """
        Mean is approximated to medoid.
        """
        update_medoids(clustering, matrix)

        global_cluster = Cluster(None, clustering.get_all_clustered_elements())
        global_cluster.prototype = global_cluster.calculate_medoid(matrix)
        global_variance = numpy.var(get_distances_of_elements_to(global_cluster.prototype,
                                                                 global_cluster.all_elements,
                                                                 matrix))
        variances = [self.cluster_variance(cluster,matrix) for cluster in clustering.clusters]

        sum_ci = numpy.sum(variances)

        Cmp = sum_ci / (len(clustering.clusters)*global_variance)

        return Cmp

Beispiel #11

0

Datei anzeigen

Datei: overlapCalculator.py Projekt: victor-gil-sepulveda/pyProCT

    def get_cluster_min_distances(cls, decomposed_cluster, distance_matrix):
        """
        Calculates the distances between the elements of all different classes in the cluster
        and returns the minimum distance for each of these elements.
        Some distances will be counted twice. This is OK.
        """
        allIds = decomposed_cluster.keys()
        min_distances = []

        if len(allIds)>1: # if the cluster is pure, we do not calculate min or max (it does not have sense)
            for setId in allIds:
                myVsIds = list(allIds)
                myVsIds.remove(setId)
                vs_elements = []
                for vsId in myVsIds:
                    vs_elements.extend(decomposed_cluster[vsId])
                for element in decomposed_cluster[setId]:
                    min_distances.append( numpy.min(get_distances_of_elements_to(element, vs_elements, distance_matrix)))
            return numpy.array(min_distances)
        else:
            raise ValueError("Asking min max distances of a PURE cluster.")

Beispiel #12

0

Datei anzeigen

 def test_get_distances_of_elements_to(self):
     matrix = CondensedMatrix(list(squared_CH_table1))
     numpy.testing.assert_equal(get_distances_of_elements_to(3, [0,1,2,4,5], matrix), [11.0, 6.0, 6.0, 13.0, 15.0])

Beispiel #13

0

Datei anzeigen

Datei: compactness.py Projekt: migonsu/pyProCT

 def cluster_variance(cls, cluster, matrix):
     """
     precondition, cluster medoid (prototype) it's alread
     """
     return numpy.var(get_distances_of_elements_to(cluster.prototype, cluster.all_elements, matrix))