def spearmanr(x, y): """ Michiel de Hoon's library (available in BioPython or standalone as PyCluster) returns Spearman rsb which does include a tie correction. >>> x = [5.05, 6.75, 3.21, 2.66] >>> y = [1.65, 26.5, -5.93, 7.96] >>> z = [1.65, 2.64, 2.64, 6.95] >>> round(spearmanr(x, y), 4) 0.4 >>> round(spearmanr(x, z), 4) -0.6325 """ from Bio.Cluster import distancematrix if not x or not y: return 0 return 1 - distancematrix((x, y), dist="s")[1][0]
def tree_from_distance_matrix(X): """Distance matrix to phylo tree""" from Bio import Phylo from Bio.Phylo.TreeConstruction import DistanceMatrix,DistanceTreeConstructor from Bio.Cluster import distancematrix names = list(X.index) if type(X) is pd.DataFrame: X = X.values mat = distancematrix(X) #print (names) #names = [i[16:] for i in names] new=[] for i in mat: new.append(np.insert(i, 0, 0).tolist()) dm = DistanceMatrix(names,new) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) #Phylo.draw_ascii(tree,file=open('temp.txt','w')) return tree
def test_distancematrix_kmedoids(self): if TestCluster.module == 'Bio.Cluster': from Bio.Cluster import distancematrix, kmedoids elif TestCluster.module == 'Pycluster': from Pycluster import distancematrix, kmedoids data = numpy.array([[2.2, 3.3, 4.4], [2.1, 1.4, 5.6], [7.8, 9.0, 1.2], [4.5, 2.3, 1.5], [4.2, 2.4, 1.9], [3.6, 3.1, 9.3], [2.3, 1.2, 3.9], [4.2, 9.6, 9.3], [1.7, 8.9, 1.1]]) mask = numpy.array([[1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1], [0, 1, 0], [1, 1, 1], [1, 0, 1], [1, 1, 1]], int) weight = numpy.array([2.0, 1.0, 0.5]) matrix = distancematrix(data, mask=mask, weight=weight) self.assertAlmostEqual(matrix[1][0], 1.243, places=3) self.assertAlmostEqual(matrix[2][0], 25.073, places=3) self.assertAlmostEqual(matrix[2][1], 44.960, places=3) self.assertAlmostEqual(matrix[3][0], 4.510, places=3) self.assertAlmostEqual(matrix[3][1], 5.924, places=3) self.assertAlmostEqual(matrix[3][2], 29.957, places=3) self.assertAlmostEqual(matrix[4][0], 3.410, places=3) self.assertAlmostEqual(matrix[4][1], 4.761, places=3) self.assertAlmostEqual(matrix[4][2], 29.203, places=3) self.assertAlmostEqual(matrix[4][3], 0.077, places=3) self.assertAlmostEqual(matrix[5][0], 0.040, places=3) self.assertAlmostEqual(matrix[5][1], 2.890, places=3) self.assertAlmostEqual(matrix[5][2], 34.810, places=3) self.assertAlmostEqual(matrix[5][3], 0.640, places=3) self.assertAlmostEqual(matrix[5][4], 0.490, places=3) self.assertAlmostEqual(matrix[6][0], 1.301, places=3) self.assertAlmostEqual(matrix[6][1], 0.447, places=3) self.assertAlmostEqual(matrix[6][2], 42.990, places=3) self.assertAlmostEqual(matrix[6][3], 3.934, places=3) self.assertAlmostEqual(matrix[6][4], 3.046, places=3) self.assertAlmostEqual(matrix[6][5], 3.610, places=3) self.assertAlmostEqual(matrix[7][0], 8.002, places=3) self.assertAlmostEqual(matrix[7][1], 6.266, places=3) self.assertAlmostEqual(matrix[7][2], 65.610, places=3) self.assertAlmostEqual(matrix[7][3], 12.240, places=3) self.assertAlmostEqual(matrix[7][4], 10.952, places=3) self.assertAlmostEqual(matrix[7][5], 0.000, places=3) self.assertAlmostEqual(matrix[7][6], 8.720, places=3) self.assertAlmostEqual(matrix[8][0], 10.659, places=3) self.assertAlmostEqual(matrix[8][1], 19.056, places=3) self.assertAlmostEqual(matrix[8][2], 0.010, places=3) self.assertAlmostEqual(matrix[8][3], 16.949, places=3) self.assertAlmostEqual(matrix[8][4], 15.734, places=3) self.assertAlmostEqual(matrix[8][5], 33.640, places=3) self.assertAlmostEqual(matrix[8][6], 18.266, places=3) self.assertAlmostEqual(matrix[8][7], 18.448, places=3) clusterid, error, nfound = kmedoids(matrix, npass=1000) self.assertEqual(clusterid[0], 5) self.assertEqual(clusterid[1], 5) self.assertEqual(clusterid[2], 2) self.assertEqual(clusterid[3], 5) self.assertEqual(clusterid[4], 5) self.assertEqual(clusterid[5], 5) self.assertEqual(clusterid[6], 5) self.assertEqual(clusterid[7], 5) self.assertEqual(clusterid[8], 2) self.assertAlmostEqual(error, 7.680, places=3)
return majority def majority_filter(array): filter_array = [find_majority(array, index) for index in range(len(array))] return filter_array def plots_outlier(samples, outlier_x): outlier_y = [samples[x-1] for x in outlier_x] ax.plot(outlier_x, outlier_y, 'or') Y = samples samples = [(x, sample) for x, sample in enumerate(samples, start=1)] distance = distancematrix(samples) clusterid, error, nfound = kmedoids(distance, nclusters=groups, npass=10) clusterid = majority_filter(clusterid) """ #對資料做kmeans input = numpy.array(samples) whitened = whiten(input) ___, labels, ___ = k_means(X = whitened, n_clusters = groups) """ segments = [list() for i in range(len(clusterid)+1)] #產生五個新的獨立list
def majority_filter(array): filter_array = [find_majority(array, index) for index in range(len(array))] return filter_array def plots_outlier(samples, outlier_x): outlier_y = [samples[x - 1] for x in outlier_x] ax.plot(outlier_x, outlier_y, 'or') Y = samples samples = [(x, sample) for x, sample in enumerate(samples, start=1)] distance = distancematrix(samples) clusterid, error, nfound = kmedoids(distance, nclusters=groups, npass=10) clusterid = majority_filter(clusterid) """ #對資料做kmeans input = numpy.array(samples) whitened = whiten(input) ___, labels, ___ = k_means(X = whitened, n_clusters = groups) """ segments = [list() for i in range(len(clusterid) + 1)] #產生五個新的獨立list #將不同群的資料放到不同列 for clusteri, sample in zip(clusterid, samples):
from Bio.Cluster import treecluster import numpy as np from Bio.Cluster import distancematrix data=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[0,1,2,3]]) tree = treecluster(data) print(tree) # 예제 데이터도 없어... # 아무튼 이런 식으로 array로 그릴수도 있고 tree = treecluster(data,dist="b",distancematrix=None) print(tree) # 다른 옵션을 줄 수도 있다. distances=distancematrix((data)) tree = treecluster(data=None,distancematrix=distances) print(tree) # Distance matrix를 미리 계산해 그걸로 그릴 수도 있다. # ValueError: use either data or distancematrix; do not use both # Data와 Distance matrix중 하나는 None이어야 한다. 안그러면 위 에러가 반긴다.
from Bio.Cluster import kmedoids from Bio.Cluster import distancematrix import numpy as np data = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [1, 2, 3, 4]]) matrix = distancematrix(data) # 뭐야 이거 왜 한영키 안먹어요 distances = distancematrix(data, dist='e') clusterid, error, nfound = kmedoids(distances) print("clusterid:", clusterid, "error:", error, "nfound:", nfound)