Example #1
0
def spearmanr(x, y):
    """
    Michiel de Hoon's library (available in BioPython or standalone as
    PyCluster) returns Spearman rsb which does include a tie correction.

    >>> x = [5.05, 6.75, 3.21, 2.66]
    >>> y = [1.65, 26.5, -5.93, 7.96]
    >>> z = [1.65, 2.64, 2.64, 6.95]
    >>> round(spearmanr(x, y), 4)
    0.4
    >>> round(spearmanr(x, z), 4)
    -0.6325
    """
    from Bio.Cluster import distancematrix

    if not x or not y:
        return 0
    return 1 - distancematrix((x, y), dist="s")[1][0]
Example #2
0
def spearmanr(x, y):
    """
    Michiel de Hoon's library (available in BioPython or standalone as
    PyCluster) returns Spearman rsb which does include a tie correction.

    >>> x = [5.05, 6.75, 3.21, 2.66]
    >>> y = [1.65, 26.5, -5.93, 7.96]
    >>> z = [1.65, 2.64, 2.64, 6.95]
    >>> round(spearmanr(x, y), 4)
    0.4
    >>> round(spearmanr(x, z), 4)
    -0.6325
    """
    from Bio.Cluster import distancematrix

    if not x or not y:
        return 0
    return 1 - distancematrix((x, y), dist="s")[1][0]
Example #3
0
def tree_from_distance_matrix(X):
    """Distance matrix to phylo tree"""

    from Bio import Phylo
    from Bio.Phylo.TreeConstruction import DistanceMatrix,DistanceTreeConstructor
    from Bio.Cluster import distancematrix

    names = list(X.index)
    if type(X) is pd.DataFrame:
        X = X.values
    mat = distancematrix(X)

    #print (names)
    #names = [i[16:] for i in names]
    new=[]
    for i in mat:
        new.append(np.insert(i, 0, 0).tolist())

    dm = DistanceMatrix(names,new)
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    #Phylo.draw_ascii(tree,file=open('temp.txt','w'))
    return tree
Example #4
0
    def test_distancematrix_kmedoids(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import distancematrix, kmedoids
        elif TestCluster.module == 'Pycluster':
            from Pycluster import distancematrix, kmedoids

        data = numpy.array([[2.2, 3.3, 4.4],
                            [2.1, 1.4, 5.6],
                            [7.8, 9.0, 1.2],
                            [4.5, 2.3, 1.5],
                            [4.2, 2.4, 1.9],
                            [3.6, 3.1, 9.3],
                            [2.3, 1.2, 3.9],
                            [4.2, 9.6, 9.3],
                            [1.7, 8.9, 1.1]])
        mask = numpy.array([[1, 1, 1],
                            [1, 1, 1],
                            [0, 1, 1],
                            [1, 1, 1],
                            [1, 1, 1],
                            [0, 1, 0],
                            [1, 1, 1],
                            [1, 0, 1],
                            [1, 1, 1]], int)
        weight = numpy.array([2.0, 1.0, 0.5])
        matrix = distancematrix(data, mask=mask, weight=weight)

        self.assertAlmostEqual(matrix[1][0], 1.243, places=3)

        self.assertAlmostEqual(matrix[2][0], 25.073, places=3)
        self.assertAlmostEqual(matrix[2][1], 44.960, places=3)

        self.assertAlmostEqual(matrix[3][0], 4.510, places=3)
        self.assertAlmostEqual(matrix[3][1], 5.924, places=3)
        self.assertAlmostEqual(matrix[3][2], 29.957, places=3)

        self.assertAlmostEqual(matrix[4][0], 3.410, places=3)
        self.assertAlmostEqual(matrix[4][1], 4.761, places=3)
        self.assertAlmostEqual(matrix[4][2], 29.203, places=3)
        self.assertAlmostEqual(matrix[4][3], 0.077, places=3)

        self.assertAlmostEqual(matrix[5][0], 0.040, places=3)
        self.assertAlmostEqual(matrix[5][1], 2.890, places=3)
        self.assertAlmostEqual(matrix[5][2], 34.810, places=3)
        self.assertAlmostEqual(matrix[5][3], 0.640, places=3)
        self.assertAlmostEqual(matrix[5][4], 0.490, places=3)

        self.assertAlmostEqual(matrix[6][0], 1.301, places=3)
        self.assertAlmostEqual(matrix[6][1], 0.447, places=3)
        self.assertAlmostEqual(matrix[6][2], 42.990, places=3)
        self.assertAlmostEqual(matrix[6][3], 3.934, places=3)
        self.assertAlmostEqual(matrix[6][4], 3.046, places=3)
        self.assertAlmostEqual(matrix[6][5], 3.610, places=3)

        self.assertAlmostEqual(matrix[7][0], 8.002, places=3)
        self.assertAlmostEqual(matrix[7][1], 6.266, places=3)
        self.assertAlmostEqual(matrix[7][2], 65.610, places=3)
        self.assertAlmostEqual(matrix[7][3], 12.240, places=3)
        self.assertAlmostEqual(matrix[7][4], 10.952, places=3)
        self.assertAlmostEqual(matrix[7][5], 0.000, places=3)
        self.assertAlmostEqual(matrix[7][6], 8.720, places=3)

        self.assertAlmostEqual(matrix[8][0], 10.659, places=3)
        self.assertAlmostEqual(matrix[8][1], 19.056, places=3)
        self.assertAlmostEqual(matrix[8][2], 0.010, places=3)
        self.assertAlmostEqual(matrix[8][3], 16.949, places=3)
        self.assertAlmostEqual(matrix[8][4], 15.734, places=3)
        self.assertAlmostEqual(matrix[8][5], 33.640, places=3)
        self.assertAlmostEqual(matrix[8][6], 18.266, places=3)
        self.assertAlmostEqual(matrix[8][7], 18.448, places=3)
        clusterid, error, nfound = kmedoids(matrix, npass=1000)
        self.assertEqual(clusterid[0], 5)
        self.assertEqual(clusterid[1], 5)
        self.assertEqual(clusterid[2], 2)
        self.assertEqual(clusterid[3], 5)
        self.assertEqual(clusterid[4], 5)
        self.assertEqual(clusterid[5], 5)
        self.assertEqual(clusterid[6], 5)
        self.assertEqual(clusterid[7], 5)
        self.assertEqual(clusterid[8], 2)
        self.assertAlmostEqual(error, 7.680, places=3)
    return majority
    
def majority_filter(array):
    filter_array = [find_majority(array, index) for index in range(len(array))]
    return filter_array
    
def plots_outlier(samples, outlier_x):
    outlier_y = [samples[x-1] for x in outlier_x]    
    
    ax.plot(outlier_x, outlier_y, 'or')


Y = samples
samples = [(x, sample) for x, sample in enumerate(samples, start=1)]

distance =  distancematrix(samples)



clusterid, error, nfound = kmedoids(distance, nclusters=groups, npass=10)

clusterid = majority_filter(clusterid)

"""
#對資料做kmeans    
input = numpy.array(samples)
whitened = whiten(input)    
___, labels, ___ = k_means(X = whitened, n_clusters = groups)
"""

segments = [list() for i in range(len(clusterid)+1)] #產生五個新的獨立list
def majority_filter(array):
    filter_array = [find_majority(array, index) for index in range(len(array))]
    return filter_array


def plots_outlier(samples, outlier_x):
    outlier_y = [samples[x - 1] for x in outlier_x]

    ax.plot(outlier_x, outlier_y, 'or')


Y = samples
samples = [(x, sample) for x, sample in enumerate(samples, start=1)]

distance = distancematrix(samples)

clusterid, error, nfound = kmedoids(distance, nclusters=groups, npass=10)

clusterid = majority_filter(clusterid)
"""
#對資料做kmeans    
input = numpy.array(samples)
whitened = whiten(input)    
___, labels, ___ = k_means(X = whitened, n_clusters = groups)
"""

segments = [list() for i in range(len(clusterid) + 1)]  #產生五個新的獨立list

#將不同群的資料放到不同列
for clusteri, sample in zip(clusterid, samples):
from Bio.Cluster import treecluster
import numpy as np
from Bio.Cluster import distancematrix
data=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[0,1,2,3]])
tree = treecluster(data)
print(tree)
# 예제 데이터도 없어...
# 아무튼 이런 식으로 array로 그릴수도 있고
tree = treecluster(data,dist="b",distancematrix=None)
print(tree)
# 다른 옵션을 줄 수도 있다.
distances=distancematrix((data))
tree = treecluster(data=None,distancematrix=distances)
print(tree)
# Distance matrix를 미리 계산해 그걸로 그릴 수도 있다.
# ValueError: use either data or distancematrix; do not use both
# Data와 Distance matrix중 하나는 None이어야 한다. 안그러면 위 에러가 반긴다.
Example #8
0
from Bio.Cluster import kmedoids
from Bio.Cluster import distancematrix
import numpy as np
data = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [1, 2, 3, 4]])
matrix = distancematrix(data)
# 뭐야 이거 왜 한영키 안먹어요
distances = distancematrix(data, dist='e')
clusterid, error, nfound = kmedoids(distances)
print("clusterid:", clusterid, "error:", error, "nfound:", nfound)