def gdraw0(graphs, plotname = 'default_name', measure = 'cosine'): pos = nx.graphviz_layout(graphs['kg']) adjs = [ array(nx.adj_matrix(g)) for g in graphs.values() ] nrms = [] for a in adjs: n = sqrt(sum(a**2)) nrms.append(a / n) kgelt = graphs.keys().index('kg') if measure == 'cosine': sims = array([round(nfu.cosine_adj(a1,nrms[kgelt]),8) for a1 in nrms]) else: raise Exception() kg = graphs['kg'] srto = argsort(graphs.keys()) #XVALs give ranks of each key index. xvals = argsort(srto) cols = map(lambda x: ('flt' in x and x.count('thr') > 1) and 'orange' or ('flt' in x) and 'red' or ('thr' in x) and 'yellow' or ('fg' in x) and 'green' or ('su' in x) and 'blue' or 'black', graphs.keys()) yvals = sims f = plt.gcf() f = myplots.fignum(3, (.25 * len(sims),10)) f.clear() ax = f.add_subplot(111) myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02]) ax.scatter(xvals,yvals,100, color = cols) ax.set_ylabel('red fly similarity ({0})'.format(measure)) ax.set_xlabel('networks') ax.set_xticklabels([]) ax.set_xticks([]) mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1] ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2) f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\ format(plotname,measure))) ax.set_xticks(range(len(srto))) ax.annotate('\n'.join(' '.join(z) for z in zip(graphs.keys(),cols)), [0,1],xycoords = 'axes fraction', va = 'top') ax.set_xticklabels([graphs.keys()[i] for i in srto], rotation = 45, size = 'xx-small',ha = 'right') f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\ format(plotname,measure)))
def gdraw(bgraph,cgraphs, plotname = 'default_name', measure = 'cosine'): #pos = nx.graphviz_layout(cgraphs['kg']) nodelist = bgraph.nodes() adjs = [ array(nx.adj_matrix(g, nodelist = nodelist)) for g in cgraphs.values() ] badj = array(nx.adj_matrix(bgraph, nodelist = nodelist)) bnrm = badj / sqrt(sum(badj**2)) if measure == 'cosine': nrms = [] bnrm = badj / sqrt(sum(badj**2)) for a in adjs: n = sqrt(sum(a**2)) nrms.append(a / n) sims = array([round(nfu.cosine_adj(a1,bnrm),8) for a1 in nrms]) elif measure =='jaccard': sims = array([round(nfu.dotprod(a1,badj),8)/ (sum(a1) + sum(badj)) for a1 in adjs]) elif measure =='specificity': sims = array([round(nfu.dotprod(a1,badj),8)/sum(a1) for a1 in adjs]) elif measure =='sensitivity': sims = array([round(nfu.dotprod(a1,badj),8)/sum(badj) for a1 in adjs]) else: raise Exception() srto = argsort(cgraphs.keys()) #XVALs give ranks of each key index. xvals = argsort(srto) cols = map(lambda x: ('flt' in x and x.count('thr') > 1) and 'orange' or ('flt' in x) and 'red' or ('thr' in x) and 'yellow' or ('fg' in x) and 'green' or ('su' in x) and 'blue' or 'black', cgraphs.keys()) yvals = sims f = plt.gcf() f = myplots.fignum(3, (.25 * len(sims),10)) f.clear() ax = f.add_subplot(111) myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02]) ax.scatter(xvals,yvals,100, color = cols) ax.set_ylabel('red fly similarity ({0})'.format(measure)) ax.set_xlabel('networks') ax.set_xticklabels([]) ax.set_xticks([]) mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1] ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2) f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\ format(plotname,measure))) ax.set_xticks(range(len(srto))) cols_added = [] annotes = [] for z in zip(cgraphs.keys(),cols): if not z[1] in cols_added: annotes.append( ' '.join(z)) cols_added.append(z[1]) ax.annotate('\n'.join(annotes), [1,1],xycoords = 'axes fraction', va = 'top', ha = 'right') ax.set_xticklabels([cgraphs.keys()[i] for i in srto], rotation = 90, va = 'bottom', size = 'xx-small') f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\ format(plotname,measure)))
def show_clusters(mods, genes, tfs, switch_axes = False, axes = 'space'): mod_srt = sorted(mods.iteritems(), key =lambda x: len(x[1]))[::-1] mod_ofinterest = mod_srt[0] f = myplots.fignum(3, (14,8)) ids,tis, cs = [[] for i in range(3)] n_tis = 60; ct = mycolors.getct(n_tis) for i,t in enumerate(tsrt[:n_tis]): nucs = [x[0] for x in t[1]['cts'] if x[1] == time_val][::5] ids.extend(nucs) tis.extend([i] * len(nucs)) cs.extend([ct[i] for j in range( len(nucs))]) #GET COORDS coords = array(list(get_coords(axes = axes, time_val = time_val, spatial_idxs = spatial_idxs, rows = rows, ids = ids))) print shape(coords) print np.min(coords), np.max(coords) #GET ELPSES all_elps = cluster_elps(coords, tis) module_scores = {} mods_of_interest = mod_srt[:3] for m in mods_of_interest: mcounts = [] for i, e in enumerate(all_elps[0]): t = tsrt[i] tkey = t[0] mcounts.append( len([e for elt in m[1] if elt['tissue'] == tkey])) mvals = array(mcounts, float) / max(mcounts) module_scores[m[0]] = mvals #PLOT EM all_axes = [f.add_subplot(ax_str) for ax_str in ['221', '222', '223', '224']] ax0 = all_axes[0] ax1 = all_axes[1] cnv = mpl.colors.ColorConverter() for j, ax in enumerate([ ax0, ax1]): if switch_axes: myplots.padded_limits(ax, coords[0,:], coords[j+1,:]) else: myplots.padded_limits(ax, coords[0,:], coords[1,:]) elps = all_elps[j] for i, e in enumerate(elps): yellow =squeeze(\ mpl.colors.rgb_to_hsv( array(cnv.to_rgb('yellow'))[na, na, :])) red = array([0.,1,1]) mscore = module_scores.values()[j][i] hsv = yellow * (1 - mscore) + red * mscore e.set_alpha(.75) e.width = e.width/3 e.height = e.width/3 e.set_zorder(mscore) color = squeeze([1,0,0]) alpha = mscore e.set_facecolor(color) e.set_alpha(alpha) e.set_edgecolor('none') ax.add_patch(e) for annote_axis , plane in zip(*[[ax0, ax1],['XY','XZ']]): annote_axis.set_title('\n'.join(tw.wrap('''PCA projection in gene space of'''+\ ' blastoderm nuclei for time = {0}.'.format(time_val)+\ 'Colors represent clusters used in'+\ 'model building\n{1} Axes, {2} Plane'.\ format(time_val,axes, plane),50))) annote_axis.set_xticks([]) annote_axis.set_yticks([]) f.savefig(myplots.figpath('first_filtering_time{0}_Axis={1}.pdf'.\ format(time_val,axes), delete = True)) raise Exception()
def show_mcmc(graphs,nc_base =12, weighted = True, module_type = 'doubles', measure = 'cosine'): nodelist = graphs.values()[0].nodes() print 'using module type: {0}'.format(module_type) adjs = [ array(nx.adj_matrix(g, nodelist = nodelist)) for g in graphs.values() ] nrms = [] for a in adjs: n = sqrt(sum(a**2)) nrms.append(a / n) idxs_allowed =[ i for i, k in enumerate(graphs.keys()) if 'mcmc' in k] belt = graphs.keys().index('network_flt{0}'.format(nc_base)) if measure == 'cosine': sims = array([round(nfu.cosine_adj(a1,nrms[belt]),8) for i, a1 in enumerate(nrms) if i in idxs_allowed]) elif measure == 'dotprod': sims = array([nfu.dotprod(a1,adjs[belt]) for i, a1 in enumerate(adfs) if i in idxs_allowed]) else: raise Exception() keys_allowed = [k for i, k in enumerate(graphs.keys()) if i in idxs_allowed] srto = argsort([k for i, k in enumerate(graphs.keys()) if i in idxs_allowed]) #XVALs give ranks of each key index. xvals = argsort(srto) cols = map(lambda x: ('flt' in x and x.count('thr') > 1) and 'orange' or ('flt' in x) and 'red' or ('thr' in x) and 'yellow' or ('fg' in x) and 'green' or ('su' in x) and 'blue' or 'black', keys_allowed) yvals = sims f = plt.gcf() f = myplots.fignum(3, (.25 * len(sims),10)) f.clear() ax = f.add_subplot(111) myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02]) ax.scatter(xvals,yvals,100, color = cols) ax.set_ylabel('red fly similarity ({0})'.format(measure)) ax.set_xlabel('networks') ax.set_xticklabels([]) ax.set_xticks([]) mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1] ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2) ax.vlines(range(len(xvals))[::10], *ax.get_ylim(), linestyle = ':',alpha = .1) figtitle = 'mcmc_net_comparisons_{0}_{1}net_comps{2}_sim_{3}_nolabels'.\ format(module_type,nc_base ,'' if weighted else 'unweighted',measure) f.savefig(figtemplate.format(figtitle)) ax.set_xticks(range(len(srto))) #ax.annotate('\n'.join(' '.join(z) for z in zip(graphs.keys())), # [0,1],xycoords = 'axes fraction', va = 'top') ax.set_xticklabels([keys_allowed[i] for i in srto], rotation = 90, size = 'xx-small', )#color = cols) figtitle = 'mcmc_net_comparisons_{0}_{1}net_comps{2}_sim_{3}_labels'.\ format(module_type,nc_base ,'' if weighted else 'unweighted', measure) f.savefig(figtemplate.format(figtitle))
def view0(modules, data_src = 'bdtnp', net_src = 'fRN', max_rank = 4, module_type = 'doubles'): ''' A routine to view the sign of interaction coefficients for a given transcription factor split per-cluster and per-module size. Designed to be run on the output of view_output.modules() ''' #COMPUTE BULK STATISTICS FOR EACH TF bd_data = nio.getBDTNP() genes = bd_data.keys() tfs =sorted(set(it.chain(*[k for k in modules.keys()]))) tf_net = nx.Graph() tf_net.add_nodes_from(tfs) tf_edges = it.chain(*[[(e0, e1) for e0 in term for e1 in term if e0 != e1] for term in modules.keys()]) tf_net.add_edges_from(tf_edges) pos = nx.graphviz_layout(tf_net) fig = myplots.fignum(1, (8,8)) tfnodes = tf_net.nodes() tfnames = tfnodes all_coefs = \ list(it.chain(*[t['coefs'] for t in modules.values()])) cstd = std(all_coefs) def colorfun(coefs): coef = median(coefs) arr = array([1.,0.,0.]) if coef < 0\ else array([0.,0.,1.]) return arr*min([1, abs(coef/cstd*2)]) def widthfun(coefs, maxwid): return max([1,5.* len(coefs) /maxwid]) for tf in tfs: fig.clf() ax = fig.add_subplot(111) ecols,ewids, estyles, ealphas =\ [{} for i in range(4)] edges = [] tf_doublet_terms = [(k, v) for k, v in modules.iteritems() if tf in k and len(set(k)) == 2 ] tf_triplet_terms = [(k, v) for k, v in modules.iteritems() if tf in k and len(set(k)) == 3 ] isdub = dict([(k,0.) for k in tfnames]) istrip =dict([(k,0.) for k in tfnames]) coflens = [len(e[1]['coefs']) for e in tf_triplet_terms + tf_doublet_terms] max_l = max(coflens) for tt,v in sorted(tf_triplet_terms, key = lambda x: len(x[1]['genes']))[::-1][:max_rank]: partners =tuple( [t for t in set(tt) if not t==tf]) for p in partners: istrip[p] = 1 #istrip[[tfnodes.index(p) for p in partners]] = 1 edge = partners ecols[edge] = colorfun(modules[tt]['coefs']) ewids[edge] = widthfun(modules[tt]['coefs'],max_l) ealphas[edge] = 1 if module_type in ['triples','all'] \ else .1 estyles[edge] = 'dotted' edges.append(edge) for td,v in sorted(tf_doublet_terms, key = lambda x: len(x[1]['genes']))[::-1][:max_rank]: partners = tuple([t for t in set(td) if not t==tf]) for p in partners: isdub[p] = 1 edge = (tuple([tf] + list(partners))) ecols[edge] = colorfun(modules[td]['coefs']) ewids[edge] = widthfun(modules[td]['coefs'], max_l) ealphas[edge] = 1 if module_type in ['doubles','all'] \ else .1 estyles[edge] = 'solid' edges.append(edge) tf_graph = nx.DiGraph() tf_graph.add_nodes_from(tfnodes) tf_graph.add_edges_from(edges) ckw = dict([ (k, dict(color = ecols[k], #array([1,0,0])*isdub[k] +\ # array([0,0,1])*istrip[k], alpha = ealphas[k], linestyle = estyles[k], linewidth = ewids[k], arrowstyle = '-')) for k in tf_graph.edges()]) circlepoints = dict([ (k, dict(facecolor ='white', #array([1,0,0])*isdub[k] +\ # array([0,0,1])*istrip[k], alpha = round(ealphas[k],2), edgecolor = ecols[k], linestyle = estyles[k], linewidth = 3,)) for k in tf_graph.edges()]) ax.set_title('Top modules for TF: {0}'.format(tf)) myplots.padded_limits(ax, *zip(*pos.values())) nodes = tf_graph.nodes() gd.draw(tf_graph,pos,edges, scatter_nodes =tf_graph.nodes(), skw = {'s':[200 if n == tf else 2 for n in nodes], 'facecolor':[colorfun(modules[tuple([n])]['coefs']) if n == tf else 'black' for n in nodes], 'edgecolor':'black', 'linewidth':2, 'alpha':1},#[1 if n == tf else .1 for n in nodes]}, ckw = {}) colors,alphas,tripoints = [{} for i in range(3)] for e in edges: colors[e] = 'black' alphas[e] = .2 tripoints[e] = array(pos[tf]) plot_tri = False plot_circ = True if plot_tri: gd.overlay(tf_graph, pos, edges, tripoints = tripoints, colors = colors, alphas = alphas) if plot_circ: gd.overlay(tf_graph, pos, edges, circlepoints =circlepoints) ax2 = fig.add_axes([.05,.05,.2,.2]) ax2.set_xticks([]) ax2.set_yticks([]) coefs = modules[tuple([tf])]['coefs'] l = len(coefs) sx = sy = ceil(sqrt(l)) xs = mod(arange(l), sx) ys = floor( arange(l) / sx) cs = [colorfun([c/2]) for c in sorted(coefs)] ss = 100 ax2.scatter(xs, ys, s = ss, color = cs) fig.savefig(figtemplate.format('tf_{0}_net_rank{1}_{2}'.\ format(tf,max_rank,module_type)))