def validate(self): x = self.data u = self.membership v = self.centers k = u.shape[0] e1 = np.sum(np.square(u) * Euclidean()(x, np.mean(x, axis=0).reshape( 1, -1))) ek = np.sum(np.square(u) * Euclidean()(x, v)) dk = np.max(Euclidean()(v)) return (1. / k * e1 / ek * dk) ** 2
def validate(self): spread = np.square(Euclidean()(self.data, self.centers)) separation = np.square(self.intercluster_distances()) separation[separation == 0] = np.inf n = self.data.shape[0] return (1. / n) * np.sum( np.multiply(np.square(self.membership), spread)) / np.min( separation)
def diameters(partition): """Calculates the maximum pairwise distance across a partition. Parameters ---------- partition : list a list of arrays, each containing data belonging to a specific cluster. """ return np.array([np.max(Euclidean()(p)) for p in partition])
def mean_pairwise_distances(partition): """Calculates the mean pairwise distance across a partition. Parameters ---------- partition : list a list of arrays, each containing data belonging to a specific cluster. """ mpd = [np.mean(Euclidean()(p)) / 2 for p in partition] return np.array(mpd)
def mean_distance_from_mean(partition): """Calculates the data's mean distance from the partition's mean. Parameters ---------- partition : list a list of arrays, each containing data belonging to a specific cluster. """ ds = [np.mean(Euclidean()(p, np.mean(p, axis=0).reshape(1, -1))) for p in partition] return np.array(ds)
def cluster_scatter(self): """Calculates the mean distance between data and cluster centers.""" return np.array( [np.mean(Euclidean()(p, a.reshape(1, -1)), axis=1) for p, a in zip(self.partition, self.centers)]).flatten()
def intercluster_distances(self): """Calculates the euclidean distance between cluster centers.""" return Euclidean()(self.centers)