def main(name_label_list, feature_file, image_name): print('Ploting dendroheatmap...') # read in names and labels names = [i[0] for i in parse_listfile(name_label_list, [1, 2])] labels = [i[1] for i in parse_listfile(name_label_list, [1, 2])] # read in features data = np.load(feature_file) # cluster the rows row_dist = ssd.squareform(ssd.pdist(data, 'cosine')) row_Z = sch.linkage(row_dist) row_idxing = sch.leaves_list(row_Z) row_labels = ['%s.%s' % (labels[i], names[i]) for i in row_idxing] # cluster the columns col_dist = ssd.squareform(ssd.pdist(data.T, 'cosine')) col_Z = sch.linkage(col_dist) col_idxing = sch.leaves_list(col_Z) # make the dendrogram data = data[:, col_idxing][row_idxing, :] #heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z) heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z) heatmap.row_labels = row_labels heatmap.title = 'Feature dendroheatmap' heatmap.export(image_name)
def plot(self, s_imgfile, colormap=None, row_labels_size=0, col_labels_size=14, l_pdf=False, l_normalize_for_color=True, l_legend_pvalue=False): import pydendroheatmap as pdh heatmap=pdh.DendroHeatMap(heat_map_data=self.data, row_labels=self.S_description, col_labels=self.S_col, left_dendrogram=self.Zr, top_dendrogram=self.Zc, row_labels_size=row_labels_size, col_labels_size=col_labels_size, l_normalize_for_color=l_normalize_for_color, l_legend_pvalue=l_legend_pvalue) if colormap is None: heatmap.colormap=heatmap.color_brewer(brewer_name='Oranges', map_type='sequential', number=3, reverse=False) else: heatmap.colormap=colormap heatmap.export(s_imgfile, l_pdf)
def run(): counts = 'phage_kmer_count_k4_c0_s2255.csv' headers = 'phage_kmer_headers_k4_c0_s2255.txt' data = normalize_rows(np.loadtxt(counts, delimiter=',')) row_labels = np.array([header.split('|')[3] for header in open(headers, 'r').readlines()[1:]]) col_labels = np.array(kmers(4)) N = 50 data = data[:N,:N] row_labels = row_labels[:N] col_labels = col_labels[:N] # cluster the rows row_dist = ssd.squareform(ssd.pdist(data)) row_Z = sch.linkage(row_dist) row_idxing = sch.leaves_list(row_Z) #cluster the columns col_dist = ssd.squareform(ssd.pdist(data.T)) col_Z = sch.linkage(col_dist) col_idxing = sch.leaves_list(col_Z) #make the dendrogram data = data[:,col_idxing][row_idxing,:] row_labels = list(row_labels[np.array(row_idxing)]) col_labels = list(col_labels[np.array(col_idxing)]) heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z) heatmap.colormap = heatmap.redBlackBlue heatmap.row_labels = row_labels heatmap.col_labels = col_labels heatmap.title = 'Bacteirophage 4-mer hierarchical clustering' heatmap.export('phage_heatmap.png') heatmap.show()
## clustering genes (rows) diss_genes = diss_genes(M,-1) z_genes = create_linkage(diss_genes,'single') idx_genes = create_leaves_list(z_genes) ## sorting matrix and labels M = M[idx_genes,:] M = M[:,idx_cells] hclust_cells = [labels_cells[int(i)] for i in idx_cells] hclust_genes = [labels_genes[int(i)] for i in idx_genes] ## plotting heatmap #bash command: pip install pydendroheatmap import pydendroheatmap as pdh heatmap = pdh.DendroHeatMap(heat_map_data=M.toarray(), left_dendrogram=z_genes, top_dendrogram=z_cells) heatmap.colormap = heatmap.yellowBlackBlue heatmap.row_labels = hclust_genes heatmap.show() ## Matlab can plot large heatmaps better than any other program. ## I strongly advise to load the clustered matrix in Matlab ## and use imagesc() to visualize the results. Students can download ## Matlab for free from the access.caltech.edu website. save_matrix(M,'clustered.mtx')
def HC(domain, para): if para in Metric: if para == 'TF': if domain == 'DietItem': X = dataGen4DietAct.genDietItemTFArray() elif domain == 'ActItem': X = dataGen4DietAct.genActItemTFArray() elif domain == 'DietType': X = dataGen4DietAct.genDietTypeTFArray() elif domain == 'ActType': X = dataGen4DietAct.genActTypeTFArray() elif para == 'TFIDF': if domain == 'DietItem': X = dataGen4DietAct.DietItemTfidfArray() elif domain == 'ActItem': X = dataGen4DietAct.ActItemTfidfArray() elif domain == 'DietType': X = dataGen4DietAct.DietTypeTfidfArray() elif domain == 'ActType': X = dataGen4DietAct.ActTypeTfidfArray() X = utilise.normArray(X) if para in Sim: Similarity_dict = {} if domain == 'DietItem': Similarity_dict = utilise.SimilarityDict(domain, para) elif domain == 'ActItem': Similarity_dict = utilise.SimilarityDict(domain, para) elif domain == 'DietType': Similarity_dict = utilise.SimilarityDict(domain, para) elif domain == 'ActType': Similarity_dict = utilise.SimilarityDict(domain, para) X = visSimilarityMat.similarityDict2array(Similarity_dict, 0) # method can be ward, complete, average method = 'ward' row_method = method row_metric = 'euclidean' column_method = method column_metric = 'euclidean' # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html # d1 = ssd.pdist(X,'cosine') d1 = ssd.pdist(X) # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform D1 = ssd.squareform(d1) # full matrix # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage Y1 = sch.linkage(D1, method=row_method, metric=row_metric) row_idxing = sch.leaves_list(Y1) # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html d2 = ssd.pdist(X.T) # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform D2 = ssd.squareform(d2) # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage Y2 = sch.linkage(D2, method=column_method, metric=column_metric) col_idxing = sch.leaves_list(Y2) heatmap_array = X[:, col_idxing][ row_idxing, :] #a numpy.ndarray or numpy.matrix, for this example, let's say mxn array top_dendrogram = Y2 #a (n-1) x 4 array side_dendrogram = Y1 #a (m-1) x 4 array row_labels = range(X.shape[0]) if para in Sim: col_labels = range(X.shape[1]) if para in Metric: if domain == 'DietItem': col_labels = utilise.itemDict2list( dataGen4DietAct.genDietItemDict()) elif domain == 'ActItem': col_labels = utilise.itemDict2list( dataGen4DietAct.genActItemDict()) elif domain == 'DietType': col_labels = utilise.itemDict2list( dataGen4DietAct.genDietTypeDict()) elif domain == 'ActType': col_labels = utilise.itemDict2list( dataGen4DietAct.genActTypeDict()) col_idxing = list(col_idxing) row_idxing = list(row_idxing) print col_idxing new_row_labels = [] new_col_labels = [] for i in range(len(row_idxing)): new_row_labels.append(str(row_labels[row_idxing[i]])) for j in range(len(col_idxing)): new_col_labels.append(str(col_labels[col_idxing[j]])) heatmap = pdh.DendroHeatMap(heat_map_data=heatmap_array, left_dendrogram=side_dendrogram, top_dendrogram=top_dendrogram) heatmap.title = 'HC_' + domain + '_' + para + '_' + method heatmap.row_labels = new_row_labels heatmap.col_labels = new_col_labels # heatmap.show() heatmap.export('VisClustering' + domain + 'Pattern/Hierarchy_' + para + '_' + method + '.png')
def run(): print('Here is the source for this example: ') print(""" import numpy as np import pyDendroHeatMap as pdh import scipy.cluster.hierarchy as sch import scipy.spatial.distance as ssd #make up some data data = np.random.normal(scale = 50,size=(50,50)) data[0:25,0:25] += 75 data[25:,25:] = np.random.poisson(lam=50,size=data[25:,25:].shape) #cluster the rows row_dist = ssd.squareform(ssd.pdist(data)) row_Z = sch.linkage(row_dist) row_idxing = sch.leaves_list(row_Z) row_labels = 50 * ['blah'] #cluster the columns col_dist = ssd.squareform(ssd.pdist(data.T)) col_Z = sch.linkage(col_dist) col_idxing = sch.leaves_list(col_Z) #make the dendrogram col_labels = 50 * ['blah'] data = data[:,col_idxing][row_idxing,:] heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z) heatmap.row_labels = row_labels heatmap.col_labels = col_labels heatmap.title = 'An example heatmap' heatmap.show() """) #make up some data data = np.random.normal(scale=50, size=(50, 50)) data[0:25, 0:25] += 75 data[25:, 25:] = np.random.poisson(lam=50, size=data[25:, 25:].shape) #cluster the rows row_dist = ssd.squareform(ssd.pdist(data)) row_Z = sch.linkage(row_dist) row_idxing = sch.leaves_list(row_Z) row_labels = 50 * ['blah'] #cluster the columns col_dist = ssd.squareform(ssd.pdist(data.T)) col_Z = sch.linkage(col_dist) col_idxing = sch.leaves_list(col_Z) #make the dendrogram col_labels = 50 * ['blah'] data = data[:, col_idxing][row_idxing, :] heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z) heatmap.row_labels = row_labels heatmap.col_labels = col_labels heatmap.title = 'An example heatmap' heatmap.show()