def dist_mat(self, metric='euclidean', transpose=True): ''' Return a MD square distance matrix corresponding to distance between rows ''' import distances if transpose: data = self.transpose() else: data = self.transpose() mat, row_labels, col_labels = data.to_matrix() D_mat = distances.pdist(mat, metric) D = MatrixDictionary() D.from_matrix(D_mat, row_labels, row_labels) return D
def dist_mat(self, metric ='euclidean', transpose = True): ''' Return a MD square distance matrix corresponding to distance between rows ''' import distances if transpose: data = self.transpose() else: data = self.transpose() mat, row_labels, col_labels = data.to_matrix() D_mat = distances.pdist(mat, metric) D = MatrixDictionary() D.from_matrix(D_mat, row_labels, row_labels) return D
def plot_dist_mat(self, metric ='euclidean', file = None, transpose = True, show_labels = False, **kwargs): import distances from heatmap_clust import clust_data, heatmap_clust matrix, row_labels, col_labels = self.to_matrix() if transpose: mat = matrix.transpose() labels = map(lambda s: s.split('_')[-1], col_labels) else: mat = matrix labels = map(lambda s: s.split('_')[-1], row_labels) D = distances.pdist(mat, metric) if show_labels: heatmap_clust(D, file = file, labels =labels, **kwargs) else: heatmap_clust(D, file = file, **kwargs)
def fuzzy_clustering(self, k, r = 2, metric = 'euclidean', rows = True): ''' Perform fuzzy c-means clustering on rows or cols. k = number of clusters. r = fuzziness exponent. Less fuzzy as r -> 1. ''' import scipy.cluster.hierarchy as sch from R_utilities import c_means import distances if rows: matrix, row_labels, col_labels = self.to_matrix() else: matrix, row_labels, col_labels = self.transpose().to_matrix() D = distances.pdist(matrix, metric) ## cluster each row and return a dict of cluster membership memb, memb_hard, stats = c_means(D,k,r, diss = True) membership = MatrixDictionary() membership.from_matrix(memb, row_labels, range(1,k+1)) membership_hard = {} for row, m in zip(row_labels, memb_hard): membership_hard[row] = m return membership, membership_hard, stats