/
make_rt_heatmap.py
79 lines (48 loc) · 2.06 KB
/
make_rt_heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import sys, re, subprocess, json
from math import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.spatial.distance import pdist,squareform
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.cluster import hierarchy
jsonFile = open(sys.argv[1], 'r')
print("Loading json file ..."),
probDict = json.load(jsonFile)
print("Done")
sortedRowNames = sorted(probDict.keys())
sortedColNames = sorted(probDict[sortedRowNames[0]].keys())
probArray = np.array([[probDict[rowName][colName] for colName in sortedRowNames] for rowName in sortedRowNames])
probDf = pd.DataFrame(probArray, columns = sortedRowNames, index = sortedRowNames)
'''
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(probDf, interpolation='nearest', cmap='hot_r')
fig.colorbar(cax)
ax.set_xticklabels([''] + list(probDf.columns))
ax.set_yticklabels([''] + list(probDf.index))
plt.show()
'''
rowDist = pd.DataFrame(squareform(pdist(probDf, metric='euclidean')), columns=sortedRowNames, index=sortedRowNames)
rowClusters = linkage(pdist(probDf, metric='euclidean'), method='complete')
hierarchy.set_link_color_palette(['black'])
fig = plt.figure(figsize = (8,8))
axd = fig.add_axes([0.09,0.1,0.2,0.6])
rowDendr = dendrogram(rowClusters, orientation = 'right', color_threshold = np.inf,)
dfRowClust = probDf.ix[rowDendr['leaves'][::-1]]
print(rowDendr['leaves'])
axd.set_xticks([])
axd.set_yticks([])
for i in axd.spines.values():
i.set_visible(False)
axm = fig.add_axes([0.26,0.1,0.6,0.6]) # x-pos, y-pos, width, height
cax = axm.matshow(probDf, interpolation='nearest')
fig.colorbar(cax)
plt.xticks(range(0, len(list(dfRowClust.columns))))
axm.set_xticklabels(list(dfRowClust.columns), rotation = 90, fontsize = 5, visible = True)
axm.yaxis.tick_right()
axm.set_yticklabels([''] + list(dfRowClust.index), fontsize = 5, visible = True)
plt.show()
#http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/clustering/hierarchical/clust_complete_linkage.ipynb
#Heatmaps in R:
#http://sebastianraschka.com/Articles/heatmaps_in_r.html