Ejemplo n.º 1
0
def main(name_label_list, feature_file, image_name):
    print('Ploting dendroheatmap...')

    # read in names and labels
    names = [i[0] for i in parse_listfile(name_label_list, [1, 2])]
    labels = [i[1] for i in parse_listfile(name_label_list, [1, 2])]

    # read in features
    data = np.load(feature_file)

    # cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data, 'cosine'))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)
    row_labels = ['%s.%s' % (labels[i], names[i]) for i in row_idxing]

    # cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T, 'cosine'))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)

    # make the dendrogram
    data = data[:, col_idxing][row_idxing, :]
    #heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z)
    heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z)
    heatmap.row_labels = row_labels
    heatmap.title = 'Feature dendroheatmap'
    heatmap.export(image_name)
Ejemplo n.º 2
0
 def plot(self, s_imgfile, colormap=None, row_labels_size=0, col_labels_size=14, l_pdf=False, l_normalize_for_color=True, l_legend_pvalue=False):
     import pydendroheatmap as pdh
     heatmap=pdh.DendroHeatMap(heat_map_data=self.data, row_labels=self.S_description, col_labels=self.S_col, left_dendrogram=self.Zr, top_dendrogram=self.Zc, row_labels_size=row_labels_size, col_labels_size=col_labels_size, l_normalize_for_color=l_normalize_for_color, l_legend_pvalue=l_legend_pvalue)
     if colormap is None:
         heatmap.colormap=heatmap.color_brewer(brewer_name='Oranges', map_type='sequential', number=3, reverse=False)
     else:
         heatmap.colormap=colormap
     heatmap.export(s_imgfile, l_pdf)
Ejemplo n.º 3
0
def run():
    counts = 'phage_kmer_count_k4_c0_s2255.csv'
    headers = 'phage_kmer_headers_k4_c0_s2255.txt'

    data = normalize_rows(np.loadtxt(counts, delimiter=','))
    row_labels = np.array([header.split('|')[3] for header in open(headers, 'r').readlines()[1:]])
    col_labels = np.array(kmers(4))

    N = 50

    data = data[:N,:N]
    row_labels = row_labels[:N]
    col_labels = col_labels[:N]

    # cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)

    #cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)

    #make the dendrogram
    data = data[:,col_idxing][row_idxing,:]
    row_labels = list(row_labels[np.array(row_idxing)])
    col_labels = list(col_labels[np.array(col_idxing)])

    heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z)
    heatmap.colormap = heatmap.redBlackBlue
    heatmap.row_labels = row_labels
    heatmap.col_labels = col_labels
    heatmap.title = 'Bacteirophage 4-mer hierarchical clustering'
    heatmap.export('phage_heatmap.png')
    heatmap.show()
Ejemplo n.º 4
0
## clustering genes (rows)
diss_genes = diss_genes(M,-1)
z_genes = create_linkage(diss_genes,'single')
idx_genes = create_leaves_list(z_genes)

## sorting matrix and labels
M = M[idx_genes,:]
M = M[:,idx_cells]
hclust_cells = [labels_cells[int(i)] for i in idx_cells]
hclust_genes = [labels_genes[int(i)] for i in idx_genes]

## plotting heatmap
#bash command: pip install pydendroheatmap
import pydendroheatmap as pdh
heatmap = pdh.DendroHeatMap(heat_map_data=M.toarray(), left_dendrogram=z_genes, top_dendrogram=z_cells)
heatmap.colormap = heatmap.yellowBlackBlue
heatmap.row_labels = hclust_genes
heatmap.show()

## Matlab can plot large heatmaps better than any other program.
## I strongly advise to load the clustered matrix in Matlab
## and use imagesc() to visualize the results. Students can download
## Matlab for free from the access.caltech.edu website.
save_matrix(M,'clustered.mtx')





def HC(domain, para):
    if para in Metric:
        if para == 'TF':
            if domain == 'DietItem':
                X = dataGen4DietAct.genDietItemTFArray()
            elif domain == 'ActItem':
                X = dataGen4DietAct.genActItemTFArray()
            elif domain == 'DietType':
                X = dataGen4DietAct.genDietTypeTFArray()
            elif domain == 'ActType':
                X = dataGen4DietAct.genActTypeTFArray()
        elif para == 'TFIDF':
            if domain == 'DietItem':
                X = dataGen4DietAct.DietItemTfidfArray()
            elif domain == 'ActItem':
                X = dataGen4DietAct.ActItemTfidfArray()
            elif domain == 'DietType':
                X = dataGen4DietAct.DietTypeTfidfArray()
            elif domain == 'ActType':
                X = dataGen4DietAct.ActTypeTfidfArray()
        X = utilise.normArray(X)

    if para in Sim:
        Similarity_dict = {}
        if domain == 'DietItem':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'ActItem':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'DietType':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'ActType':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        X = visSimilarityMat.similarityDict2array(Similarity_dict, 0)

    # method can be ward, complete, average
    method = 'ward'
    row_method = method
    row_metric = 'euclidean'
    column_method = method
    column_metric = 'euclidean'

    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html
    # d1 = ssd.pdist(X,'cosine')
    d1 = ssd.pdist(X)
    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform
    D1 = ssd.squareform(d1)  # full matrix
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    Y1 = sch.linkage(D1, method=row_method, metric=row_metric)
    row_idxing = sch.leaves_list(Y1)

    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html
    d2 = ssd.pdist(X.T)
    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform
    D2 = ssd.squareform(d2)
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    Y2 = sch.linkage(D2, method=column_method, metric=column_metric)
    col_idxing = sch.leaves_list(Y2)

    heatmap_array = X[:, col_idxing][
        row_idxing, :]  #a numpy.ndarray or numpy.matrix, for this example, let's say mxn array
    top_dendrogram = Y2  #a (n-1) x 4 array
    side_dendrogram = Y1  #a (m-1) x 4 array

    row_labels = range(X.shape[0])
    if para in Sim:
        col_labels = range(X.shape[1])
    if para in Metric:
        if domain == 'DietItem':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genDietItemDict())
        elif domain == 'ActItem':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genActItemDict())
        elif domain == 'DietType':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genDietTypeDict())
        elif domain == 'ActType':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genActTypeDict())
    col_idxing = list(col_idxing)
    row_idxing = list(row_idxing)
    print col_idxing

    new_row_labels = []
    new_col_labels = []
    for i in range(len(row_idxing)):
        new_row_labels.append(str(row_labels[row_idxing[i]]))
    for j in range(len(col_idxing)):
        new_col_labels.append(str(col_labels[col_idxing[j]]))

    heatmap = pdh.DendroHeatMap(heat_map_data=heatmap_array,
                                left_dendrogram=side_dendrogram,
                                top_dendrogram=top_dendrogram)
    heatmap.title = 'HC_' + domain + '_' + para + '_' + method
    heatmap.row_labels = new_row_labels
    heatmap.col_labels = new_col_labels

    # heatmap.show()
    heatmap.export('VisClustering' + domain + 'Pattern/Hierarchy_' + para +
                   '_' + method + '.png')
Ejemplo n.º 6
0
def run():

    print('Here is the source for this example: ')
    print("""
    import numpy as np
    import pyDendroHeatMap as pdh
    import scipy.cluster.hierarchy as sch
    import scipy.spatial.distance as ssd

    #make up some data
    data = np.random.normal(scale = 50,size=(50,50))
    data[0:25,0:25] += 75
    data[25:,25:] = np.random.poisson(lam=50,size=data[25:,25:].shape)
    #cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)

    row_labels = 50 * ['blah']

    #cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)
    #make the dendrogram

    col_labels = 50 * ['blah']

    data = data[:,col_idxing][row_idxing,:]

    heatmap = pdh.DendroHeatMap(heat_map_data=data, left_dendrogram=row_Z, top_dendrogram=col_Z)
    heatmap.row_labels = row_labels
    heatmap.col_labels = col_labels
    heatmap.title = 'An example heatmap'
    heatmap.show()
    """)

    #make up some data
    data = np.random.normal(scale=50, size=(50, 50))
    data[0:25, 0:25] += 75
    data[25:, 25:] = np.random.poisson(lam=50, size=data[25:, 25:].shape)
    #cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)

    row_labels = 50 * ['blah']

    #cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)
    #make the dendrogram

    col_labels = 50 * ['blah']

    data = data[:, col_idxing][row_idxing, :]

    heatmap = pdh.DendroHeatMap(heat_map_data=data,
                                left_dendrogram=row_Z,
                                top_dendrogram=col_Z)
    heatmap.row_labels = row_labels
    heatmap.col_labels = col_labels
    heatmap.title = 'An example heatmap'
    heatmap.show()