def show_brain_flows(g,ngs,fgs, communities): centers,volumes, voxels = aba.brain_regions(len(communities), return_voxels = True) p0 = {} n_communities = {} for i, c in enumerate(communities): nv = len(voxels[i]) for j, n in enumerate(c): p0[n] = voxels[i][int(floor(random.rand()*nv))][:2] + random.rand()*.4 n_communities[n] = i comm_ct = mycolors.getct(len(n_communities)) nodelist = g.nodes() ckw = dict([(k,dict(facecolor = 'gray', alpha = 1, linewidth = .5, arrowstyle = '-|>', edgecolor = 'black', color = comm_ct[n_communities[k[0]]], shrinkA = 0, shrinkB = 0)) for k in g.edges() ]) skw =dict(facecolor = 'none', edgecolor = [comm_ct[n_communities[n]] for n in nodelist], s = 1) gd.draw(g,p0,g.edges()[::10], scatter_nodes = nodelist, ckw = ckw, skw = skw, ckalpha = .8, cktype = 'simple') return colors = mycolors.getct(len(fgs)) for i,fg in enumerate(fgs): edges = fg.edges() weights = [fg[e[0]][e[1]]['weight'] for e in edges] ckw = dict([(k, dict(color = colors[i], linewidth = weights[j]/3)) for j, k in enumerate(edges) ]) gd.draw(fg, p0, edges, ckw = ckw, scatter_nodes = [], cktype = 'simple', ckalpha = .25, )
def show_brain_flows(g, ngs, fgs, communities): centers, volumes, voxels = aba.brain_regions(len(communities), return_voxels=True) p0 = {} n_communities = {} for i, c in enumerate(communities): nv = len(voxels[i]) for j, n in enumerate(c): p0[n] = voxels[i][int(floor( random.rand() * nv))][:2] + random.rand() * .4 n_communities[n] = i comm_ct = mycolors.getct(len(n_communities)) nodelist = g.nodes() ckw = dict([(k, dict(facecolor='gray', alpha=1, linewidth=.5, arrowstyle='-|>', edgecolor='black', color=comm_ct[n_communities[k[0]]], shrinkA=0, shrinkB=0)) for k in g.edges()]) skw = dict(facecolor='none', edgecolor=[comm_ct[n_communities[n]] for n in nodelist], s=1) gd.draw(g, p0, g.edges()[::10], scatter_nodes=nodelist, ckw=ckw, skw=skw, ckalpha=.8, cktype='simple') return colors = mycolors.getct(len(fgs)) for i, fg in enumerate(fgs): edges = fg.edges() weights = [fg[e[0]][e[1]]['weight'] for e in edges] ckw = dict([(k, dict(color=colors[i], linewidth=weights[j] / 3)) for j, k in enumerate(edges)]) gd.draw( fg, p0, edges, ckw=ckw, scatter_nodes=[], cktype='simple', ckalpha=.25, )
def snp_counts(indy_arr, indy_info): regions = indy_regions(indy_arr, indy_info) ct = mycolors.getct(len(regions)) skip = 1 ofs = 4 f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) n_snps = 20 rset = set(regions) rcounts = zeros((len(rset), n_snps)) xs = [] ys = [] cs = [] rs = [] for i, snp in enumerate(indy_arr.T[4::5][:50]): rsub = array(regions[::100],float) / max(regions[:20]) inds = argsort(rsub) ys.extend([i] *len(rsub)) rs.extend([10 + 30 * (1 - snp[:2:100])]) xs.extend(rsub) print i ax.scatter(xs,ys,rs) f.savefig(myplots.figpath('regional_snp_counts_first.pdf')) return
def show_flows(g, ngs, fgs): p0 = gd.getpos(g) ckw = dict([(k, dict(facecolor='gray', alpha=1, linewidth=.5, arrowstyle='-|>', edgecolor='black', color='gray', shrinkA=0, shrinkB=0)) for k in g.edges()]) skw = dict(facecolor='none', edgecolor='black', s=20) gd.draw(g, p0, g.edges(), ckw=ckw, skw=skw, cktype='simple') colors = mycolors.getct(len(fgs)) for i, fg in enumerate(fgs): edges = fg.edges() weights = [fg[e[0]][e[1]]['weight'] for e in edges] ckw = dict([(k, dict(color=colors[i], linewidth=weights[j] / 3)) for j, k in enumerate(edges)]) gd.draw(fg, p0, edges, ckw=ckw, scatter_nodes=[], cktype='simple')
def make_degree_plots_0(): cxns = get_synapse_array() rows = get_rows() imaps = get_array_imaps() ctypes =imaps['ctypes'] ctypes_imap = imaps['ctypes_imap'] nnames = imaps['nnames'] nnames_imap = imaps['nnames_imap'] f2 = myplots.fignum(2, (12,6)) ax1 = f2.add_subplot(121) ax2 = f2.add_subplot(122) var_degs = np.sum(cxns,1) maxval = log10(np.max(var_degs) + 1) ct = mycolors.getct(len(ctypes)) for z in range(len(ctypes)): vals = var_degs[:,z] vals = log10(1 + vals) count,kde = make_kde(vals) xax = linspace(0,maxval,10) h = histogram(vals, xax) ax1.hist(vals,xax, color = ct[z], zorder = 10, alpha = .25) ax1.plot(xax,kde(xax)*sum(h[0]), label = ctypes[z], color = ct[z], zorder = 5) ax1.set_xlabel('$log_10$ of edge degrees of various types') ax1.legend() logxy = [ log10(1 +var_degs[:,ctypes_imap['S']]), log10(1 +var_degs[:,ctypes_imap['R']])] max_inode =np.argmax(logxy[0] + logxy[1]) max_nodename = [k for k,v in nnames_imap.iteritems() if v == max_inode][0] ax2.scatter(logxy[0]+.15*random.rand(len(nnames)) ,logxy[1] + .15*random.rand(len(nnames)), color = 'red', alpha = .3) ax2.set_xlabel('Sending Degree') ax2.set_ylabel('Receiving Degree') r2 = corrcoef(logxy[0],logxy[1])[1,0] myplots.maketitle(ax2, ('correlation coeff: {0:2.2},\n'+\ 'max {1} has {2} $e_{{out}}$, {3} $e_{{in}}$')\ .format(r2, max_nodename, var_degs[max_inode, ctypes_imap['S']], var_degs[max_inode, ctypes_imap['R']])) myplots.maketitle(ax1, 'histogram and KDE of\nvarious edge degrees') f2.savefig(myplots.figpath('degree_histograms_{0}'.format(edge_set)))
def make_edge_comparisons(cgraphs, bgraphs): cgsets = dict([(k, set(v.edges())) for k, v in cgraphs.iteritems()]) for bname, bg in bgraphs.iteritems(): #if bname != 'kn': continue f = myplots.fignum(3,(8,8)) f.clear() axes = [f.add_subplot(311), f.add_subplot(312), f.add_subplot(313)] ccolors = dict(zip(cgraphs.keys(), mycolors.getct(len(cgraphs)))) bgset = set(bg.edges()) yvals = {'jaccard':[], 'spec':[], 'sens':[]} xofs = 0 heights, xvals ,colors ,names= [], [], [], [] for cname, cg in sorted(cgraphs.iteritems(), key = lambda x: x[0]): cgset = set(cg) #SIMILARITIES MATCHING THE ORDER OF SIMNAMES yvals['jaccard'].append(float(len(bgset.intersection(cgsets[cname])))/\ len(bgset.union(cgsets[cname]))) yvals['spec'].append( float(len(bgset.intersection(cgsets[cname])))/\ len(cgsets[cname])) yvals['sens'].append( float(len(bgset.intersection(cgsets[cname])))/\ len(bgset)) #colors.extend([ccolors[cname]] * len(sims)) #heights.extend(sims) names.append(cname ) #xvals.extend(xofs +arange(len(sims))) #xofs = max(xvals) + 2 #if cname == 'unsup': raise Exception() for j, elt in enumerate(yvals.iteritems()): metric_name = elt[0] heights = elt[1] print heights ax = axes[j] xvals = argsort(argsort(heights)) ax.bar(xvals, heights, color = [ccolors[n] for n in names]) ax.set_title('edge similarity vs {0}, metric: {1}'.\ format(bname, metric_name)) myplots.color_legend(f, ccolors.values(), ccolors.keys()) #for i , n in enumerate(names): # ax.annotate(n, [xvals[i], .001], # xycoords = 'data', # xytext = [2,0], # textcoords = 'offset points', # rotation = 90, va = 'bottom', ha = 'left') f.savefig(figtemplate.format('edges_vs_{0}'.format(bname)))
def plot_ronn(**kwargs): l5 = last_5(**mem.sr(kwargs)) ys = zeros((len(l5), len(l5.values()[0]))) for i, h in enumerate(l5.values()): for j, d in enumerate(h): if len(d) == 0: continue cas = [e.created_at for e in d] secs = np.sum([(((ca.hour) * 60 + ca.minute) * 60) + ca.second for ca in cas]) ys[i, j] = secs ys = ys[:, -20:] ys = ys - np.min(ys, 1)[:, newaxis] colors = mycolors.getct(len(ys)) seismic.seismic(ys, stacked=True, colors=colors)
def show_binned_data_1d(descriptors, datum, cmaps): ''' Grab metadata and parsed grids from binned_data_1d and plot them. ''' f = myplots.fignum(1, (6, 8)) gl = len(descriptors) dshapes = [shape(d) for d in datum] ntasks = dshapes[0][-1] task_colors = mycolors.getct(ntasks) for i, e in enumerate(datum): ax = f.add_subplot('{0}1{1}'\ .format(gl,i + 1)) ax.set_title(descriptors[i][0]) sums = np.sum(e, 0) for i, task in enumerate(sums.T): p = ax.plot(log(2 + task[::20]), color=task_colors[i], label=cmaps[i])
def show_brain(): n = 10 import matplotlib.pyplot as plt f = plt.figure(3) plt.clf() ax = f.add_subplot(111) coords = structure_voxels() struct_keys =[e[0] for e in sorted( coords.iteritems(), key = lambda x:len(x[1]))[::-1][:n] ] coords = dict([(k,coords[k]) for k in struct_keys]) f = plt.figure(3) ct = dict([(k,v) for k,v in zip(coords.keys(), mycolors.getct(len(coords)))]) for k, v in coords.iteritems(): ax.scatter(*v[::10,[0,1]].T,s = 50,c=ct[k],edgecolor = 'none') return
def show_flows(g, ngs, fgs): p0 = gd.getpos(g) ckw = dict([(k,dict(facecolor = 'gray', alpha = 1, linewidth = .5, arrowstyle = '-|>', edgecolor = 'black', color = 'gray', shrinkA = 0, shrinkB = 0)) for k in g.edges() ]) skw =dict(facecolor = 'none', edgecolor = 'black', s = 20) gd.draw(g,p0,g.edges(), ckw = ckw, skw = skw, cktype = 'simple') colors = mycolors.getct(len(fgs)) for i,fg in enumerate(fgs): edges = fg.edges() weights = [fg[e[0]][e[1]]['weight'] for e in edges] ckw = dict([(k, dict(color = colors[i], linewidth = weights[j]/3)) for j, k in enumerate(edges) ]) gd.draw(fg, p0, edges, ckw = ckw, scatter_nodes = [], cktype = 'simple')
def show_binned_data_1d(descriptors, datum, cmaps): ''' Grab metadata and parsed grids from binned_data_1d and plot them. ''' f = myplots.fignum(1, (6,8)) gl = len(descriptors) dshapes = [shape(d) for d in datum] ntasks = dshapes[0][-1] task_colors = mycolors.getct(ntasks) for i,e in enumerate(datum): ax = f.add_subplot('{0}1{1}'\ .format(gl,i + 1)) ax.set_title(descriptors[i][0]) sums =np.sum(e, 0) for i,task in enumerate(sums.T): p = ax.plot(log(2 + task[::20]), color =task_colors[i], label = cmaps[i])
def show_fixations(all_fixations,cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star','torus','torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c= name_colors[j] task_10ptime = array([ [item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0),0))[0] frac_allowed =float( len(idxs_allowed))/ shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed )!= 0: nrml = task_10ptime[:,idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i,j,:] = deltas[:-1] all_counts[i,j] = len(idxs_allowed) all_fracs[i,j] = frac_allowed mut_spool.append({'tuple':(i,j), 'mut':m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t,v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n= len(p10_times) these_x = (zeros(n) + i ) + random.uniform(size = n)/3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2,(6,6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8,8)) ax = f2.add_axes([.3,.3,.6,.6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]],cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks,-1,0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation = -90, va = 'top', ha = 'left') rows = [] labels = [] for ms in sorted(mut_spool, key = lambda x:x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0],tup[1],:]) ct = all_counts[tup] frac =all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows,interpolation = 'nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
def motif_name_vs_induction(mgroup = None, mtuple = None, hit = True, induction_type = 'ratio'): if mgroup != None: mtuples = motif_grps(mgroup, hit = hit) mdict = get_motif_dicts() muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples]))) else: muts_allowed = set(get_motif_dicts()[mtuple]) motifs = get_motifs() seqs, rndvals, keys = get_mutants() #USE ONLY FILTERED SEQS! seqs, rndvals, keys = \ [seqs[i] for i in muts_allowed],\ array([rndvals[i] for i in muts_allowed]),\ [keys[i] for i in muts_allowed] keys_allowed = set(keys) if induction_type == 'ratio': mut_inductions =(rndvals[:,0] / rndvals[:,1]) elif induction_type == 'on': mut_inductions = rndvals[:,0] elif induction_type == 'off': mut_inductions = rndvals[:,1] inductions = dict([(keys[i], mut_inductions[i] ) for i in range(len(rndvals)) ] ) if mgroup == None: figtitle = 'motifs/ind_type={1}/mname_v_induction_tuple={0}'.\ format(mtuple, induction_type) else: figtitle = 'motifs/ind_type={1}/mname_v_induction_group={0}'.\ format(mgroup, induction_type) f = myplots.fignum(3, (8,8)) m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs] for k, seq_motifs in motifs.iteritems() if k in keys_allowed])) unq_keys = list(set(m_occurences)) kcount =[ m_occurences.count(m ) for m in unq_keys ] msort = nonzero(greater(kcount, 200))[0] #TAKING ONLY THE 10 MOST COMMON MOTIFS unq_keys = [unq_keys[i] for i in msort[0:]] m_total_scores = dict([(mkey, [{'seq':skey, 'score':sum([ elt['score'] for elt in seq_motifs if elt['motif'] == mkey ]), 'starts':[elt['start'] for elt in seq_motifs if elt['motif'] == mkey], 'stops':[elt['end'] for elt in seq_motifs if elt['motif'] == mkey]} for skey, seq_motifs in motifs.iteritems() if skey in keys_allowed]) for mkey in unq_keys]) ax = f.add_subplot(211) ax2 = f.add_subplot(212) count = -1 colors = mycolors.getct(len(unq_keys)) for mname, scores in m_total_scores.iteritems(): count += 1 thr = .15 inds = [log(inductions[elt['seq']]) for elt in scores if elt['score'] > thr] if len(inds)< 3: continue these_scores = [ v['score'] for v in scores if v['score'] > thr] xax = linspace(min(these_scores), max(these_scores),5) pfit = polyfit(these_scores, inds, 1) ax.plot(xax, polyval(pfit,xax), color = colors[count], linewidth = 3) ofs = 0 xseq = arange(len(seqs[0])) for seqelt in scores[:100]: for start,stop in zip(*[seqelt['starts'], seqelt['stops']]): ofs += .25 ax2.plot([start,stop], [ofs, ofs+.2], alpha = .5,color = 'red' if pfit[0] < 0 else 'blue') fpath = figtemplate.format(figtitle) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) f.savefig(figtemplate.format(figtitle)) pass
def map_pca(indy_arr, indy_info, pca = None, metagroup = 'all', **kwargs): ''' output of indy_arr must be set to linear. ''' metagroups = region_groups(indy_info) if metagroup == 'all': inds_allowed = arange(len(indy_arr)) else: inds_allowed = metagroups[metagroup]['indy_idxs'] #fetch some global variables regions = array(indy_regions(indy_arr, indy_info)) iis = [x[1] for x in sorted([(k,v) for k,v in indy_info.iteritems()], key = lambda x:x[0])] #and then extract the metaregion indy_arr = indy_arr[inds_allowed] regions = regions[inds_allowed] iis = [iis[ia] for ia in inds_allowed] #compute pca if none is given if pca == None: corr = cc(indy_arr) indy_pcs, gene_pcs, s = cc_svd(corr, indy_arr) pca = gene_pcs #and from pca, coordinates xys = dot(indy_arr, pca[:,:2]).T #then plot ct = array(mycolors.getct(max(regions)+1)) r_abbrevs = region_abbrevs() f = myplots.fignum(1,(14,8)) ax = f.add_subplot(111) ax.set_title('top two principle component projection of individual genotypes') for k,g in it.groupby(sorted(enumerate(regions), key = lambda x: x[1]), key = lambda x: x[1]): grp = list(g) e0 = grp[0][0] i0 = iis[e0] ax.scatter(xys[0,[e[0] for e in grp]], xys[1,[e[0] for e in grp]] , color = ct[k], label = '{0} - {1}'.format(i0['region_symb'], i0['region_desc'])) #plot regionwide ellipses c_imap = dict([(v,k) for k,v in enumerate(set(regions))]) c_rmap = dict([(v,k) for k,v in c_imap.iteritems()]) clusters = array([c_imap[r] for r in regions]) csort = argsort(clusters) elps, infos = elpsfit.cluster_ellipses(xys[:,csort],clusters[csort]) for i, e in enumerate(elps): e.set_alpha(.75) e.set_facecolor(ct[c_rmap[i]]) e.set_edgecolor('none') maj = e.width minor = e.height e.height = maj * 2 e.width = minor * 2 ax.add_patch(e) ax.legend() f.savefig(myplots.figpath('pca_grouping={0}.pdf'.format(metagroup))) return
def motif_dist_v_cooperativity(mgroup = None, mtuple = None, hit = True, induction_type = 'ratio', midpoint = None): if mgroup != None: mtuples = motif_grps(mgroup, hit = hit) mdict = get_motif_dicts() muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples]))) else: muts_allowed = set(get_motif_dicts()[mtuple]) assert midpoint != None motifs = get_motifs() seqs, rndvals, keys = get_mutants() #USE ONLY FILTERED SEQS! seqs, rndvals, keys = \ [seqs[i] for i in muts_allowed],\ array([rndvals[i] for i in muts_allowed]),\ [keys[i] for i in muts_allowed] keys_allowed = set(keys) if induction_type == 'ratio': mut_inductions =(rndvals[:,0] / rndvals[:,1]) elif induction_type == 'on': mut_inductions = rndvals[:,0] elif induction_type == 'off': mut_inductions = rndvals[:,1] inductions = dict([(keys[i], mut_inductions[i] ) for i in range(len(rndvals)) ] ) if mgroup == None: figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_tuple={0}'.\ format(mtuple, induction_type) else: figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_group={0}{2}'.\ format(mgroup, induction_type, '_hit' if hit else '') thr = .25 m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs if elt['score'] > thr] for k, seq_motifs in motifs.iteritems() if k in keys_allowed])) #unq_keys = sorted(list(set(m_occurences))) kcounts = dict([(k, len(list(g))) for k, g in it.groupby(sorted(m_occurences))]) unq_keys = kcounts.keys() kcount =[ kcounts[m] for m in unq_keys ] msort = nonzero(greater(kcount, 5) * less(kcount, 500))[0] msort = sorted(msort, key = lambda x: kcount[x])[::-1] #TAKING ONLY THE 10 MOST COMMON MOTIFS unq_keys = [unq_keys[i] for i in msort[0:]] f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) colors = mycolors.getct(len(unq_keys)) ct = 0 all_vals = [] for mkey in unq_keys: mscores =[{'seq':skey, 'score': [ elt['score'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr ], 'starts':[elt['start'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr], 'stops':[elt['end'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr]} for skey in keys_allowed] vals = list(it.chain(*[[ (midpoint - \ mean([mseq['starts'][i], mseq['stops'][i]]),\ log(inductions[mseq['seq']]) ) for i in range(len(mseq['starts'])) ] for mseq in mscores ] )) if vals: #ax.scatter(*zip(*vals) , # s = 10, alpha = .25, # c = colors[ct]) all_vals.extend(vals) ct += 1 if ct > 1000: break vsrt = sorted(all_vals, key = lambda x: x[0]) xv = [a[0] for a in all_vals] means = zeros(max(xv)+1) #counts = zeros(max(xv)+1) for k, g in it.groupby(all_vals, key = lambda x: x[0]): means[k] = percentile([elt[1] for elt in g], 90) elts = nonzero(means)[0] ax.plot(elts, [means[e] for e in elts]) ax.set_xlabel('distance from strongest promoters') ax.set_ylabel('induction') ax.annotate(figtitle, [0,0], xycoords ='figure fraction', va = 'bottom', ha = 'left') fpath = figtemplate.format(figtitle) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) f.savefig(figtemplate.format(figtitle))
def run_sig(genes, show_disc = False, weighted = True): genes2 = [] for g in genes: genes2.append((g[0], [[gelt[0], gelt[1]] for gelt in g[1]], g[2])) genes = genes2 modules = [m[0] for m in genes] if len(modules[0]) == 2: module_type = 'doubles' else: module_type = 'triples' counts = [m[1] for m in genes] tgs,tfs = nio.getNet() bd = nio.getBDTNP() nodes_allowed = set(bd.keys()) cnodes = list(nodes_allowed) dnodes = [] dedges = [] cedges = [] cnodes = [] for m in genes: for tginfo in m[1]: tg = tginfo[0] tg_mcount = tginfo[1] dtgnode = '{0}_{1}_mod{2}'.format(tg,tg,m[0]) ctgnode = '{0}'.format(tg) dnodes.append(dtgnode) cnodes.append(ctgnode) for tf in m[0]: dtfnode = '{0}_{1}_mod{2}'.format(tf,tg,m[0]) ctfnode = '{0}'.format(tf) dnodes.append(dtfnode) cnodes.append(ctfnode) dedges.append((dtfnode, dtgnode,tg_mcount)) cedges.append((ctfnode, ctgnode,tg_mcount)) nodes_allowed = list(set(cnodes)) if show_disc: dgraph, cgraph = [nx.Graph() for i in range(2)] dgraph.add_nodes_from(list(set(dnodes))) dgraph.add_weighted_edges_from(list(set(dedges))) f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) pos=nx.graphviz_layout(dgraph,prog="neato") # color nodes the same in each connected subgraph C=nx.connected_component_subgraphs(dgraph) for g in C: c=[random.random()]*nx.number_of_nodes(g) # random color... nx.draw(g, pos, node_size=40, node_color=c, vmin=0.0, vmax=1.0, with_labels=False ) figtitle = 'mcmc_disc' f.savefig(figtemplate.format(figtitle)) return cgraph = nx.DiGraph() cgraph.add_nodes_from(cnodes) cedgegrps = [(k,list(g)) for k, g in it.groupby(\ sorted(cedges, key = lambda x: (x[0],x[1])), key = lambda x: (x[0],x[1]))] cedges = [ (k[0],k[1], sum([gelt[2] for gelt in g])) for k,g in cedgegrps] if weighted == False: for ce in cedges: ce[2] = 1 cgraph.add_weighted_edges_from(list(set(cedges))) sfRN = [(tf, tg, float(wt)) for tg, elt in tgs.iteritems() if tg in nodes_allowed for tf, wt in zip(elt['tfs'], elt['weights']) if tf in nodes_allowed] fg = nx.DiGraph() fg.add_nodes_from(cnodes) fg.add_weighted_edges_from(sfRN) colors = mycolors.getct(len(cnodes)) f = myplots.fignum(5, (8,8)) ax =f.add_subplot(111) pos=nx.graphviz_layout(fg,prog="neato") # color nodes the same in each connected subgraph nx.draw(cgraph, pos, node_size=100, node_color=colors, vmin=0.0, vmax=1.0, with_labels=False, alpha = 1. ) ax.set_title('connectivity of MCMC for network {0}'.format(module_type)) figtitle = 'mcmc_network_{0}{1}'.\ format(module_type,'' if weighted else 'unweighted') f.savefig(figtemplate.format(figtitle)) f = myplots.fignum(5, (8,8)) ax =f.add_subplot(111) #pos=nx.graphviz_layout(fg,prog="neato") # color nodes the same in each connected subgraph nx.draw(fg, pos, node_size=100, node_color=colors, vmin=0.0, vmax=1.0, with_labels=False ) ax.set_title('connectivity of reference for network {0}'.format(module_type)) figtitle = 'mcmc_ref_network_{0}{1}'.\ format(module_type,'' if weighted else 'unweighted') f.savefig(figtemplate.format(figtitle)) graphs = {'mcmc':cgraph,'network':fg} v0 = graphs.values() k0 = graphs.keys() for k,g in zip(k0,v0): for prc in [1,50,95]: thr = percentile([e[2]['weight'] for e in nx.to_edgelist(g)], prc) graphs.update([('{0}_thr{1}%'.format(k,prc), nfu.thr_graph(g,thr))]) v0 = graphs.values() k0 = graphs.keys() for k, v in zip(k0,v0): tot_edges = len(nx.to_edgelist(fg)) for n_c in [2,4,6,8,12,20]: for max_edges in array([.5,1.,2.]) * tot_edges : gfilt = nfu.filter_graph(v, n_c = n_c) gfilt = nfu.top_edges(gfilt, max_edges = max_edges) gthr = nfu.thr_graph(gfilt, 1e-8) graphs.update([('{0}_flt{1}'.format(k,n_c),gfilt)]) graphs.update([('{0}_flt{1}_thr0'.format(k,n_c),gthr)]) return graphs
def run(num = 2): dfile = sio.loadmat(cfg.dataPath('soheil/expression_c4d_n4_intercluster.mat')) trgs, tfs = nio.getNet() bdgenes = nio.getBDTNP() bdset = set(bdgenes.keys()) xs, ys, colors, corrs,lcorrs = [[] for i in range(5)] count = 0 for k, v in bdgenes.iteritems(): count += 1 if count < num: continue if not trgs.has_key(k): continue trg = trgs[k] fsub = set(tfs.keys()).intersection(bdset) gexpr = bdgenes[k]['vals'][::50,4].flatten() #squeeze(dfile[k]) fexpr = [bdgenes[fname]['vals'][::50,4].flatten() for fname in fsub]#[squeeze(dfile[fname]) for fname in fsub] print shape(fexpr) if len(fexpr )< 3: continue ct = mycolors.getct(len(fexpr)) for idx, f in enumerate(fexpr): c = corrcoef(f, gexpr)[0,1] if not isfinite(c): c = 0 lc = corrcoef(log(f), log(gexpr))[0,1] if not isfinite(lc): lc = 0 corrs.append(c) lcorrs.append(lc) ys.append(gexpr) xs.append(f) colors.append([ct[idx]]* len(f)) break if len(xs) > 10000: break cbest = argsort(-1 * abs(array(corrs))) f = plt.figure(1) f.clear() ax = f.add_subplot(111) inds = argsort(gexpr) for idx in cbest[:3]: import scipy.signal as ss import cb.utils.sigsmooth as sgs #k = sgs.gauss_kern(15)[8,:].flatten() #xconv = ss.convolve(xs[idx][inds],k) xv = ss.medfilt(xs[idx][inds],1) yv = ys[idx][inds] print corrcoef(xv,yv)[0,1] print corrs[idx] ax.plot(ss.medfilt(xs[idx][inds],1), linewidth = 10, color = colors[idx][0]) ax.plot(ys[idx][inds], linewidth = 10)
def show_clusters(mods, genes, tfs, switch_axes = False, axes = 'space'): mod_srt = sorted(mods.iteritems(), key =lambda x: len(x[1]))[::-1] mod_ofinterest = mod_srt[0] f = myplots.fignum(3, (14,8)) ids,tis, cs = [[] for i in range(3)] n_tis = 60; ct = mycolors.getct(n_tis) for i,t in enumerate(tsrt[:n_tis]): nucs = [x[0] for x in t[1]['cts'] if x[1] == time_val][::5] ids.extend(nucs) tis.extend([i] * len(nucs)) cs.extend([ct[i] for j in range( len(nucs))]) #GET COORDS coords = array(list(get_coords(axes = axes, time_val = time_val, spatial_idxs = spatial_idxs, rows = rows, ids = ids))) print shape(coords) print np.min(coords), np.max(coords) #GET ELPSES all_elps = cluster_elps(coords, tis) module_scores = {} mods_of_interest = mod_srt[:3] for m in mods_of_interest: mcounts = [] for i, e in enumerate(all_elps[0]): t = tsrt[i] tkey = t[0] mcounts.append( len([e for elt in m[1] if elt['tissue'] == tkey])) mvals = array(mcounts, float) / max(mcounts) module_scores[m[0]] = mvals #PLOT EM all_axes = [f.add_subplot(ax_str) for ax_str in ['221', '222', '223', '224']] ax0 = all_axes[0] ax1 = all_axes[1] cnv = mpl.colors.ColorConverter() for j, ax in enumerate([ ax0, ax1]): if switch_axes: myplots.padded_limits(ax, coords[0,:], coords[j+1,:]) else: myplots.padded_limits(ax, coords[0,:], coords[1,:]) elps = all_elps[j] for i, e in enumerate(elps): yellow =squeeze(\ mpl.colors.rgb_to_hsv( array(cnv.to_rgb('yellow'))[na, na, :])) red = array([0.,1,1]) mscore = module_scores.values()[j][i] hsv = yellow * (1 - mscore) + red * mscore e.set_alpha(.75) e.width = e.width/3 e.height = e.width/3 e.set_zorder(mscore) color = squeeze([1,0,0]) alpha = mscore e.set_facecolor(color) e.set_alpha(alpha) e.set_edgecolor('none') ax.add_patch(e) for annote_axis , plane in zip(*[[ax0, ax1],['XY','XZ']]): annote_axis.set_title('\n'.join(tw.wrap('''PCA projection in gene space of'''+\ ' blastoderm nuclei for time = {0}.'.format(time_val)+\ 'Colors represent clusters used in'+\ 'model building\n{1} Axes, {2} Plane'.\ format(time_val,axes, plane),50))) annote_axis.set_xticks([]) annote_axis.set_yticks([]) f.savefig(myplots.figpath('first_filtering_time{0}_Axis={1}.pdf'.\ format(time_val,axes), delete = True)) raise Exception()
cols = [str(i) for i in range(max(grid_int)+1)] else: raise Exception('Grid type not implemented') #grid = params['grid'] x = shape(grid)[0] y = shape(grid)[1] n = x * y if x != y: print "Grid not square, skipping" return False ct = mycolors.getct(len(cols)) nc = len(ct) xs,ys,rs,cs = [[] for i in range(4)] c_lambda = lambda x: x>=0 and ct[x] or [0,0,0] arr = zeros((x,y,3)) for i in range(nc): arr[nonzero(equal(grid, i))] = ct[i] fig.clear() sxs = False scatter_GEO = True if scatter_GEO: sxs = scatter_array(arr,params) if not sxs: plt.imshow(arr,interpolation = 'nearest')
def plot_ts(fig,params, do_grid_resources = True): fname = params['fname'] window = params.get('window',20) cols, data = au.parse_printed( fname) data_dict_unfiltered = {} widths_dict = {} for i in range(len(cols)): c = cols[i] data_dict_unfiltered[c] = array(map(lambda x: x[i], data),float) widths_dict[c] = i +1 if 'Update' in cols: cols.remove('Update') data_dict = {} for c in cols: data_dict[c] = data_dict_unfiltered[c] name = params['name'] if name in ['tasks','fitness','all','cdata','res']: pass else: raise Exception('unhandled graph type: '+name) nplots = len(cols) fig.clear() widths = params.get('widths',1) last_y = zeros(window) ct =mycolors.getct(len(cols)) do_fold = False if fold_ts and nplots > 9: do_fold = True if do_fold: all_cols = [cols[0:nplots/2],cols[nplots/2:]] else: all_cols = [cols] j = 0 colors = ct[(nplots/len(all_cols)*j):(nplots/len(all_cols)*(j+1))] yvals = zeros( (len(all_cols[j]) ,window ) ) for i in range(len(all_cols[j])): c = all_cols[j][i] n = min(window, len(data_dict[c])) y_sm = data_dict[c][-n:] yvals[i,-n:] = y_sm seismic(fig, yvals, colors, cols) up.color_legend(fig, colors, cols,pos = 3) print 'Current level: ', cols[0], data_dict[cols[0]][-1] plt.draw() if name == 'res' and do_grid_resources == True: grid_resources(cols) return
def run(): chains = parse_all() c0 = chains[0] ksrt =sorted(c0.keys()) charges = array([ sum([float(e['charge']) for e in c0[k]['pqr']]) for k in ksrt]) coords = array([ mean([e.get_coord() for e in c0[k]['pdb']], 0 ) for k in ksrt]) c1,c2 = chains[1:] strand_charges = [] strand_coords = [] for c in [c1, c2]: ksrt =sorted(c.keys()) strand_charges.append(array([ sum([float(e['charge']) for e in c[k]['pqr']]) for k in ksrt])) strand_coords.append(array([ mean([e.get_coord() for e in c[k]['pdb']], 0 ) for k in ksrt])) k1 = sorted([nt for nt in c1]) k2 = sorted([nt for nt in c2]) s1_atoms = list(it.chain(*[ [e.get_coord() for e in c1[k]['pdb'] ] for k in k1])) s2_atoms = list(it.chain(*[ [e.get_coord() for e in c2[k]['pdb'] ] for k in k2])) dna_atoms = [] dna_atoms.extend(s1_atoms) dna_atoms.extend(s2_atoms) dna_atoms = array(dna_atoms) #nearest neighbor params: kres =0 katoms_dna = 3 kres_atoms = 3 rvd_res = rvd_residues(c0,kres) xs = [] ys = [] cs = [] ss = [] ecs = [] rdists = [] rvd_groups = []; for i, rvd in enumerate(rvd_res): for r in rvd: atoms = array([e.get_coord() for e in r['pdb']]) dists = sum(square(atoms),1)[:,newaxis] + \ sum(square(dna_atoms),1)[newaxis,:] - \ 2 *sum(atoms[:,newaxis,:] * dna_atoms[newaxis,:,:],2) atom_srt_dists = np.sort(dists, 1) atom_knn_avgdist = np.mean(atom_srt_dists[:,:katoms_dna],1) res_srt_dists = np.sort(atom_knn_avgdist) res_k_avgdist = res_srt_dists[:kres_atoms] xs.append(mean(atoms[:,0])) ys.append(mean(atoms[:,1])) #colors = array([1,0,0]) * 1/atom_knn_avgdist[:,newaxis] cs.append(1/res_k_avgdist) rdists.append(res_k_avgdist) ss.append(50) ecs.append('none') rvd_groups.append(i) show_helix = False if show_helix: cs = array(cs) cs /= np.max(cs) f = mp.fignum(1, (12,12)) ax = f.add_subplot(111) ax.scatter(xs,ys,c = cs, s= ss, edgecolor = ecs) f.savefig(mp.figpath(figt.format('tal_rvd_neighborhoods'))) rvd_dists = [(k,[e[1] for e in list(g)]) for k,g in it.groupby(zip(rvd_groups,rdists), lambda x: x[0])] rs = rvds() tags = [ seq()[r:r+2] for r in rs ] nt = len(set(tags)) tag_idx_in_ct = dict([(e,i) for i,e in enumerate(set(tags))]) rvd_ct_map = dict([(i,tag_idx_in_ct[e]) for i,e in enumerate(tags)]) ct = mycolors.getct(nt) f = mp.fignum(3, (12,12)) ax= f.add_subplot(111) ax.set_xlabel('linear distance') ax.set_ylabel('nearest neighbor distance to DNA') labels_set = set([]) for k, g in rvd_dists: if tags[k] in labels_set: ax.plot(g, color = ct[rvd_ct_map[k]]) else: labels_set.add(tags[k]) print 'labelling' ax.plot(g, color = ct[rvd_ct_map[k]],label = tags[k]) ax.legend() f.savefig(mp.figpath(figt.format('tal_rvd_distances'))) #plot_charges(coords, charges, strand_coords) return
def show_fixations(all_fixations, cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star', 'torus', 'torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c = name_colors[j] task_10ptime = array([[item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0), 0))[0] frac_allowed = float(len(idxs_allowed)) / shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed) != 0: nrml = task_10ptime[:, idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i, j, :] = deltas[:-1] all_counts[i, j] = len(idxs_allowed) all_fracs[i, j] = frac_allowed mut_spool.append({'tuple': (i, j), 'mut': m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t, v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n = len(p10_times) these_x = (zeros(n) + i) + random.uniform(size=n) / 3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2, (6, 6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8, 8)) ax = f2.add_axes([.3, .3, .6, .6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]], cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks, -1, 0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation=-90, va='top', ha='left') rows = [] labels = [] for ms in sorted(mut_spool, key=lambda x: x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0], tup[1], :]) ct = all_counts[tup] frac = all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows, interpolation='nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))