def life_time(Zpy, ns): """ life-time criterion for automatic selection of the number of clusters [porting from life-time implementation on matlab] Input: Zpy (array): input data array of shape (number samples x number features). ns (int): number of samples. Output: res (dict): output dict with indexes for each cluster determined. Example: res = {'-1': noise indexes list, '0': cluster 0 indexes list, '1': cluster 1 indexes list} Configurable fields:{"name": "cluster.dbscan", "config": {"min_samples": "10", "eps": "0.95", "metric": "euclidean"}, "inputs": ["data"], "outputs": ["core_samples", "labels"]} See Also: Example: References: .. [1] """ Z = hierarchy.to_mlab_linkage(Zpy) #dif=Z[1:,2]-Z[0:-1,2] dif = np.diff(Z[:, 2]) indice = np.argmax(dif) maximo = dif[indice] indice = Z[find(Z[:, 2] > Z[indice, 2]), 2] if indice == []: cont = 1 else: cont = len(indice) + 1 # th = maximo #testing the situation when only 1 cluster is present #max>2*min_interval -> nc=1 minimo = np.min(dif[pl.find(dif != 0)]) if minimo != maximo: #se maximo=minimo e' porque temos um matriz de assocs perfeita de 0s e 1s if maximo < 2 * minimo: cont = 1 nc_stable = cont if nc_stable > 1: labels = hierarchy.fcluster(hierarchy.from_mlab_linkage(Z), nc_stable, 'maxclust') else: #ns_stable=1 labels = np.arange(ns, dtype="int") return labels
def test_mlab_linkage_conversion_multiple_rows(self): # Tests from/to_mlab_linkage on linkage array with multiple rows. Zm = np.asarray([[3, 6, 138], [4, 5, 219], [1, 8, 255], [2, 9, 268], [7, 10, 295]]) Z = np.array( [[2., 5., 138., 2.], [3., 4., 219., 2.], [0., 7., 255., 3.], [1., 8., 268., 4.], [6., 9., 295., 6.]], dtype=np.double) assert_equal(from_mlab_linkage(Zm), Z) assert_equal(to_mlab_linkage(Z), Zm)
def test_mlab_linkage_conversion_multiple_rows(self): # Tests from/to_mlab_linkage on linkage array with multiple rows. Zm = np.asarray([[3, 6, 138], [4, 5, 219], [1, 8, 255], [2, 9, 268], [7, 10, 295]]) Z = np.array([[2., 5., 138., 2.], [3., 4., 219., 2.], [0., 7., 255., 3.], [1., 8., 268., 4.], [6., 9., 295., 6.]], dtype=np.double) assert_equal(from_mlab_linkage(Zm), Z) assert_equal(to_mlab_linkage(Z), Zm)
def test_mlab_linkage_conversion_multiple_rows(self): # Tests from/to_mlab_linkage on linkage array with multiple rows. Zm = np.asarray([[3, 6, 138], [4, 5, 219], [1, 8, 255], [2, 9, 268], [7, 10, 295]]) Z = np.array( [ [2.0, 5.0, 138.0, 2.0], [3.0, 4.0, 219.0, 2.0], [0.0, 7.0, 255.0, 3.0], [1.0, 8.0, 268.0, 4.0], [6.0, 9.0, 295.0, 6.0], ], dtype=np.double, ) assert_equal(from_mlab_linkage(Zm), Z) assert_equal(to_mlab_linkage(Z), Zm)
def test_mlab_linkage_conversion_single_row(self): # Tests from/to_mlab_linkage on linkage array with single row. Z = np.asarray([[0., 1., 3., 2.]]) Zm = [[1, 2, 3]] assert_equal(from_mlab_linkage(Zm), Z) assert_equal(to_mlab_linkage(Z), Zm)
def test_mlab_linkage_conversion_empty(self): # Tests from/to_mlab_linkage on empty linkage array. X = np.asarray([]) assert_equal(from_mlab_linkage([]), X) assert_equal(to_mlab_linkage([]), X)
def show_dendrogram(Z, **kwargs): from scipy.cluster.hierarchy import dendrogram, from_mlab_linkage from matplotlib import pyplot as plt dendrogram(from_mlab_linkage(Z), **kwargs) plt.show()
import scipy.cluster.hierarchy as hcluster import numpy as np import matplotlib.pyplot as plt chained_linkage = np.array([[1, 2, 1], [9, 3, 2], [10, 4, 3], [11, 5, 4], [12, 6, 5], [13, 7, 6], [14, 8, 7]]) chained_linkage = hcluster.from_mlab_linkage(chained_linkage) perfect_linkage = hcluster.from_mlab_linkage(np.array([[1,2,1], [3,4,1], [5,6,1], [7,8,1], [9,10,2], [11,12,2], [13,14,3]])) hcluster.dendrogram(perfect_linkage, color_threshold=0) plt.savefig('perfect_linkage.pdf') plt.close('all') hcluster.dendrogram(chained_linkage, color_threshold=0) plt.savefig('worst_case_linkage.pdf') plt.close('all')
def check_linkage_q(self, method): # Tests linkage(Y, method) on the Q data set. Z = linkage(eo['Q-X'], method) Zmlab = eo['linkage-Q-%s' % method] expectedZ = from_mlab_linkage(Zmlab) assert_allclose(Z, expectedZ, atol=1e-06)
def check_linkage_tdist(self, method): # Tests linkage(Y, method) on the tdist data set. Z = linkage(_ytdist, method) Zmlab = eo['linkage-%s-tdist' % method] expectedZ = from_mlab_linkage(Zmlab) assert_allclose(Z, expectedZ, atol=1e-10)