def compare_entropy(G): FILE = "/Users/joazofeifa/Lab/TF_predictions/HOCOMOCOv9_AD_MEME.txt" D = at.load_PSSMs(FILE,test=False) xy = list() for exp in G: if np.random.uniform(0,1) < 0.5: for TF in G[exp]: GC = get_GC_motif(D, TF) if GC and G[exp][TF][0]: xy.append((G[exp][TF][0],G[exp][TF][3], GC) ) F = plt.figure() ax = F.add_axes([0.1, 0.1, 0.35, 0.35]) axc = F.add_axes([0.5, 0.1, 0.05, 0.35]) ax3 = F.add_axes([0.6, 0.55, 0.35, 0.35]) ax2 = F.add_axes([0.1, 0.55, 0.35, 0.35]) cmap = plt.get_cmap('Blues') x,y = [math.log(u, 10) for u,v,z in xy if v > 0.005 ],[v for u,v,z in xy if v> 0.005 ] c = [z for u,v,z in xy if v > 0.005 ] print len(c), len(y) norm = mpl.colors.Normalize(vmin=min(c), vmax=max(c)) m = cm.ScalarMappable(norm=norm, cmap=cmap) cc = [m.to_rgba(C) for C in c] cb1 = mpl.colorbar.ColorbarBase(axc, cmap=cmap, norm=norm, orientation='vertical') cb1.set_label('GC Content') ax.scatter(x,y, color=cc, edgecolor='' ) ax.set_xlabel("KS-Test") ax.set_ylabel("True Positive Rate") ax.grid() ax2.hist(c, bins=25) ax2.set_xlabel("GC Content for All HOCOMOCO Motifs") ax2.set_ylabel("Frequency") XY = np.vstack([c,y]) z = gaussian_kde(XY)(XY) ax3.scatter(c,y, s=5, edgecolor='') ax3.set_xlabel("GC Content") ax3.set_ylabel("True Positive Rate") ax3.grid() plt.show() pass
nx.draw_networkx_edges(G, pos=pos, alpha=0.7,width=3,ax=ax,edge_color=colors) nx.draw_networkx_labels(G,pos,LABELS,font_size=7, ax=ax) plt.show() if __name__ == "__main__": make_distance = False OUT = "/Users/joazofeifa/Lab/EMG/TF_predictions/files/" SCIPY = False CUSTOM = False ts, xs = get_counts(OUT+"thresholds_nearest_assignments.tsv", show_thresh=False) threshold_as_network(xs[5]) if make_distance: FILE = "/Users/joazofeifa/Lab/TF_predictions/HOCOMOCOv9_AD_MEME.txt" D = at.load_PSSMs(FILE,test=False) perform_linkage(D, OUT=OUT) if CUSTOM: FILE = "/Users/joazofeifa/Lab/TF_predictions/HOCOMOCOv9_AD_MEME.txt" KL,M = load_matrix(OUT+"kl_distance_matrix.csv") try_different_thresholds(KL, M, OUT=OUT, res=50) if SCIPY: KL,M = load_matrix(OUT+"kl_distance_matrix.csv") H,M = load_matrix(OUT+"hamming_distance_matrix.csv") SCIPY = True if SCIPY: P = linkage(KL,H,M, SHOW=True) extract_clusters(P, threshold=None)