def analyze(): f = myplots.fignum(1) gl = get_data()['gluc'] cl = get_data()['cluc'] ax = f.add_subplot(111) ax.imshow(cl / gl, aspect='auto', interpolation='nearest') ax.set_title('Enrichment of Cluc over control Gluc') path = myplots.figpath('corr_matrix.pdf') f.savefig(path) f.clear() ax = f.add_subplot(121) glf = gl.flatten()[:-6] clf = cl.flatten()[:-6] n_mm = array([[e, e, e] for e in [0, 2, 2, 2, 3, 3, 3, 0, 2, 2, 2, 3, 3, 3]], float).flatten() ax.set_title('cluc enrichment vs mm count') ax.set_xlabel('mismatch count') ax.set_ylabel('fold enrichment ocluc') ax.scatter(n_mm, clf / glf) pf1 = polyfit(n_mm, clf / glf, 1) pf2 = polyfit(n_mm, clf / glf, 2) ax.plot(polyval(pf1, [0, 2, 3])) path = myplots.figpath('enrichment_vs_mm.pdf') f.savefig(path) ax2 = f.add_subplot(121)
def analyze(): f = myplots.fignum(1) gl = get_data()['gluc'] cl = get_data()['cluc'] ax = f.add_subplot(111) ax.imshow(cl / gl, aspect = 'auto', interpolation = 'nearest') ax.set_title('Enrichment of Cluc over control Gluc') path = myplots.figpath('corr_matrix.pdf') f.savefig(path) f.clear() ax = f.add_subplot(121) glf = gl.flatten()[:-6] clf = cl.flatten()[:-6] n_mm = array([ [e,e,e] for e in [0,2,2,2,3,3,3,0,2,2,2,3,3,3]], float).flatten() ax.set_title('cluc enrichment vs mm count') ax.set_xlabel('mismatch count') ax.set_ylabel('fold enrichment ocluc') ax.scatter(n_mm,clf/glf) pf1 = polyfit(n_mm, clf/glf, 1) pf2 = polyfit(n_mm, clf/glf, 2) ax.plot(polyval(pf1,[0,2,3])) path = myplots.figpath('enrichment_vs_mm.pdf') f.savefig(path) ax2 = f.add_subplot(121)
def peak_thr_histograms(**kwargs): '''histograms of score and distance''' dthr = 1500 sthr = 1e-2 dsign = -1 simple = wp.get_simple_thr(**mem.rc(kwargs, dthr = dthr, sthr = sthr, dsign = dsign ) ) min_score = -1 max_score = -1 for k,v in simple.iteritems(): smax = np.max(v['scores']) if max_score == -1 or smax > max_score: max_score = smax smin = np.min(v['scores']) if min_score == -1 or smin < min_score: min_score = smin lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))] sbin_mids = range(lrange[0],lrange[1]+1) nsb = len(sbin_mids) sbins = zeros((nsb)) dbin_size = 50 dbin_mids = range(-dthr, dthr, dbin_size) ndb = len(dbin_mids) dbins = zeros(( ndb)) for k,v in simple.iteritems(): for d in v['dists']: dbins[int(d + dthr)/dbin_size] += 1 for s in v['scores']: sbins[int(log10(s) - lrange[0])] += 1 f= myplots.fignum(1,(10,6)) ax = f.add_subplot(121) ax.set_ylabel('log 10 counts') ax.set_xlabel('distance') ax.set_title('simplified tss distances (d<{0})'.format(dthr)) ax.plot(dbin_mids,log10(dbins), color = 'black') f.savefig(myplots.figpath('chip_simple_distance.pdf')) ax = f.add_subplot(122) ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr)) ax.set_ylabel('log10 counts') ax.set_xlabel('log10 peak score') ax.plot(sbin_mids,log10(sbins), color = 'black') f.savefig(myplots.figpath('chip_simple_scores.pdf'))
def snp_counts(indy_arr, indy_info): regions = indy_regions(indy_arr, indy_info) ct = mycolors.getct(len(regions)) skip = 1 ofs = 4 f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) n_snps = 20 rset = set(regions) rcounts = zeros((len(rset), n_snps)) xs = [] ys = [] cs = [] rs = [] for i, snp in enumerate(indy_arr.T[4::5][:50]): rsub = array(regions[::100],float) / max(regions[:20]) inds = argsort(rsub) ys.extend([i] *len(rsub)) rs.extend([10 + 30 * (1 - snp[:2:100])]) xs.extend(rsub) print i ax.scatter(xs,ys,rs) f.savefig(myplots.figpath('regional_snp_counts_first.pdf')) return
def run(meth = 'moment'): out,srts = bs.run0(arr = arr, itr = 2, meth = meth) f = myplots.fignum(3,(12,6)) ax = f.add_subplot(111) csrts = [s for s in srts if len(s) == len(cols)][0] rsrts = [s for s in srts if len(s) == len(rows)][0] cprint = [rows[rs] for rs in rsrts] rprint = [cols[cs] for cs in csrts] im = ax.imshow(out, interpolation= 'nearest', cmap = plt.get_cmap('OrRd'), ) #flip the rows and columns... looks better. ax.set_xticks(arange(len(cols))+.25) ax.set_yticks(arange(len(rows))+.25) ax.set_yticklabels([e for e in cprint]) ax.set_xticklabels(rprint) print 'rows: \n{0}'.format(', '.join([e.strip() for e in rprint])) print print 'cols: \n{0}'.format(', '.join([e.strip() for e in cprint])) plt.colorbar(im) f.savefig(myplots.figpath('correlation_plot_2_4_{0}.pdf') .format(meth)) return
def make_degree_plots_0(): cxns = get_synapse_array() rows = get_rows() imaps = get_array_imaps() ctypes =imaps['ctypes'] ctypes_imap = imaps['ctypes_imap'] nnames = imaps['nnames'] nnames_imap = imaps['nnames_imap'] f2 = myplots.fignum(2, (12,6)) ax1 = f2.add_subplot(121) ax2 = f2.add_subplot(122) var_degs = np.sum(cxns,1) maxval = log10(np.max(var_degs) + 1) ct = mycolors.getct(len(ctypes)) for z in range(len(ctypes)): vals = var_degs[:,z] vals = log10(1 + vals) count,kde = make_kde(vals) xax = linspace(0,maxval,10) h = histogram(vals, xax) ax1.hist(vals,xax, color = ct[z], zorder = 10, alpha = .25) ax1.plot(xax,kde(xax)*sum(h[0]), label = ctypes[z], color = ct[z], zorder = 5) ax1.set_xlabel('$log_10$ of edge degrees of various types') ax1.legend() logxy = [ log10(1 +var_degs[:,ctypes_imap['S']]), log10(1 +var_degs[:,ctypes_imap['R']])] max_inode =np.argmax(logxy[0] + logxy[1]) max_nodename = [k for k,v in nnames_imap.iteritems() if v == max_inode][0] ax2.scatter(logxy[0]+.15*random.rand(len(nnames)) ,logxy[1] + .15*random.rand(len(nnames)), color = 'red', alpha = .3) ax2.set_xlabel('Sending Degree') ax2.set_ylabel('Receiving Degree') r2 = corrcoef(logxy[0],logxy[1])[1,0] myplots.maketitle(ax2, ('correlation coeff: {0:2.2},\n'+\ 'max {1} has {2} $e_{{out}}$, {3} $e_{{in}}$')\ .format(r2, max_nodename, var_degs[max_inode, ctypes_imap['S']], var_degs[max_inode, ctypes_imap['R']])) myplots.maketitle(ax1, 'histogram and KDE of\nvarious edge degrees') f2.savefig(myplots.figpath('degree_histograms_{0}'.format(edge_set)))
def align_len_histogram(parsed): p0 = parsed.values()[0] bitlens = array( [sorted([e['bits'] for e in val.values()])[:-1] for val in p0.values()]).flatten() bitlens = array(list(it.chain(*bitlens))) bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8 mind= 8 #min(deg_c.values())+.00001 maxd = max(bitlens) #max(deg_c.values())/3 bins = linspace(mind,maxd,8) h_paths,bin_edges = histogram(bitlens,bins) h_paths = array(h_paths,float) h_paths/= sum(h_paths) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color = 'red') ax.set_xlabel('alignment hit length') ax.set_ylabel('frequency') ax.set_title('best matched substring lengths') raise Exception() f.savefig(myplots.figpath('walk_centrality')) paths_cat = paths.flat n = len(paths_cat) degs = [deg_c[p]for p in paths_cat[::10]] mind= min(deg_c.values())+.00001 maxd = max(deg_c.values())/3 bins = linspace(mind,maxd,8) h_paths,bin_edges = histogram(degs,bins) h_rand,bin_edges = histogram(deg_c.values(), bins) h_paths = array(h_paths,float) h_rand = array(h_rand,float) h_paths/= sum(h_paths) h_rand/= sum(h_rand) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color = 'red') ax.plot(bins[:-1], h_rand, color = 'black') ax.set_xlabel('node centrality') ax.set_ylabel('frequency') ax.set_title('distribution of centrality in walks vs. random')
def align_len_histogram(parsed): p0 = parsed.values()[0] bitlens = array([ sorted([e['bits'] for e in val.values()])[:-1] for val in p0.values() ]).flatten() bitlens = array(list(it.chain(*bitlens))) bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8 mind = 8 #min(deg_c.values())+.00001 maxd = max(bitlens) #max(deg_c.values())/3 bins = linspace(mind, maxd, 8) h_paths, bin_edges = histogram(bitlens, bins) h_paths = array(h_paths, float) h_paths /= sum(h_paths) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color='red') ax.set_xlabel('alignment hit length') ax.set_ylabel('frequency') ax.set_title('best matched substring lengths') raise Exception() f.savefig(myplots.figpath('walk_centrality')) paths_cat = paths.flat n = len(paths_cat) degs = [deg_c[p] for p in paths_cat[::10]] mind = min(deg_c.values()) + .00001 maxd = max(deg_c.values()) / 3 bins = linspace(mind, maxd, 8) h_paths, bin_edges = histogram(degs, bins) h_rand, bin_edges = histogram(deg_c.values(), bins) h_paths = array(h_paths, float) h_rand = array(h_rand, float) h_paths /= sum(h_paths) h_rand /= sum(h_rand) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color='red') ax.plot(bins[:-1], h_rand, color='black') ax.set_xlabel('node centrality') ax.set_ylabel('frequency') ax.set_title('distribution of centrality in walks vs. random')
def plot_easy_inference(): dg = io.getGraph() pos = gd.getpos(dg) f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) ax.set_title('putative worm chip network') gd.easy_draw(dg, pos) f.savefig(myplots.figpath('worm_chip_graph.pdf'))
def plot_mers(mer_cts): f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) hist, bin_edges = histogram(mer_cts.values(), 20) ax.fill_between(bin_edges[:-1], log(hist), edgecolor='black', linewidth=5) ax.set_xlabel('mer rediscovery rate') ax.set_ylabel('$log(n)$') ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.') f.savefig(myplots.figpath('mer_ct_hist')) return
def peak_distance_histogram(**kwargs): atype = kwargs.get('atype', wp. default_atype) chips = wp.get_assay_gprops(**mem.rc(kwargs)) chiplist = chips.values() chipkeys = chips.keys() xs = [] ys = [] sec_spread = np.max([ np.max([ np.max(np.abs([e['dist'] for e in v2['secondaries']])) for v2 in v.values()]) for v in chips.values() ]) hist_spread = 10000 bin_wid = 200 bin_mids = arange(-1* hist_spread, 1*hist_spread,bin_wid) bin_starts = bin_mids - bin_wid/2 nb = len(bin_starts) prim_hists = zeros((len(chips), len(bin_starts))) sec_hists = zeros((len(chips), len(bin_starts))) for i,e in enumerate(chiplist): for k,v in e.iteritems(): pbins = array([e2['dist']/bin_wid for e2 in v['primaries']],int) sbins = array([e2['dist']/bin_wid for e2 in v['secondaries']],int) pbins += nb /2 sbins += nb /2 sbins[less(sbins,0)] = 0 sbins[greater(sbins,nb-1)] = nb-1 pbins[less(pbins,0)] = 0 pbins[greater(pbins,nb-1)] = nb-1 for b in pbins: prim_hists[i][b]+=1 for b in sbins: sec_hists[i][b]+=1 f= myplots.fignum(1,(8,6)) ax = f.add_subplot(111) ax.set_title('chip peak distances to primary/sec tss for {0}'.format(atype)) for p in prim_hists: ax.plot(bin_mids,p, color = 'green') for s in sec_hists: ax.plot(bin_mids,s, color = 'red') f.savefig(myplots.figpath('chip_distance_hists_for{0}.pdf'.format(atype)))
def plot_mers(mer_cts): f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) hist,bin_edges = histogram(mer_cts.values(), 20) ax.fill_between(bin_edges[:-1], log(hist), edgecolor = 'black', linewidth = 5) ax.set_xlabel('mer rediscovery rate') ax.set_ylabel('$log(n)$') ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.') f.savefig(myplots.figpath('mer_ct_hist')) return
def load(res = 25): if res == 25: fpath = '/data/brain_atlas/AtlasAnnotation25.sva' else: raise Exception() print 'path: ', fpath size = os.path.getsize(fpath) n = 10000 skip = size / n f = open(fpath) f.readline() f.readline() coords = [] evals = [] while( len(coords) < n): f.seek(skip, 1) l0 = f.readline() l = f.readline() if( l == ''): break; lvals =[float(v) for v in l.split(',')] coords.append(tuple(lvals[0:3])) evals.append(lvals[3]) print 'len: ', (len(coords)) fig = myplots.fignum(1,(8,6)) ax = fig.add_subplot(111, projection='3d') xyvals = array([[x[0],x[1], x[2]] for x in coords]) evals = array(evals) evals = (evals / np.max(evals) )[:,newaxis] * array([1.,0,0]) print shape(xyvals) ax.scatter(xyvals[:,0], xyvals[:,1], xyvals[:,2], s = 5, edgecolor = 'none', facecolor = evals) #raise Exception() path = myplots.figpath('brainmap_spatial_coords_{0}'.format(res)) fig.savefig(path) return coords
def process_rc(cc, rows, cols, meth="binary"): rmembers = zeros(len(rows)) + 4 cmembers = zeros(len(rows)) + 4 for i, r in enumerate(rows): rmembers[i] = argmax(r) if np.max(r) > 0 else len(r) for i, c in enumerate(cols): cmembers[i] = argmax(c) if np.max(c) > 0 else len(c) rorder = argsort(rmembers) corder = argsort(cmembers) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.imshow(cc[rorder][:, corder], aspect="auto", interpolation="nearest") f.savefig(myplots.figpath("biclustered_expr_{0}.pdf".format(meth))) raise Exception()
def plot_charges(coords, charges, strand_coords): f0 = mp.fignum(1, (6,6)) ax = f0.add_subplot(111) colors = [ 'red' if q > 0 else 'blue' for q in charges] ax.scatter(*coords[:,:2].T, c = colors, s = 50, zorder = 5) ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = 1, zorder = -1) ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = 1, zorder = -1) ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = .5, zorder = 6) ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = .5, zorder = 6) #ax.scatter(*txy.T, c = 'red') fp = mp.figpath(figt.format('tal_xy_charge_scatter')) f0.savefig(fp) return
def plot_grids(params, name = 'tasks_followed.data'): path =os.path.join('data',name) fopen = open(path) lines = [l.strip().split(' ') for l in fopen.readlines() if l.strip() != ''] f = plt.gcf() f.clear() ofs = 0 cols = len(lines[0]) last = None while ofs + cols < len(lines): sub = lines[ofs:ofs+cols] ofs += cols * 100 if sub == last: continue sxs = color_grid(sub,params) if not sxs: continue f.savefig(myplots.figpath(name + 'iter={0}.png'.format(ofs)),format = 'png') f.clear() last = sub print ofs
def check_results(locii, results, n_runs = 400): a0 = fetch_num_ali() names = fetch_alinames() f = myplots.fignum(3,(8,8)) ax = f.add_subplot(211) vec = zeros(len(a0[0])) xys = {} for k in results.keys(): xys[k] = array([[l,v['Mean z-score']] for v,l in zip(results[k],locii[k]) if len(v) >= 19],float).T raise Exception() ax2 = f.add_subplot(111) ax2.scatter(xys[3][0],xys[3][1]) #ax2.scatter(xys[8][0],xys[8][1], color = 'red') f.savefig(myplots.figpath('run0_zscores_{0}runs'.format(n_runs)))
def get_coexpression(gc, **kwargs): gc = gc / sum(gc, 0) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) for c in gc[:1]: # cplot = nonzero(greater(c,-1))[0][::10] # xs = array([genome_coords[k[0]] # for k in sorted(gene_srtidxs.iteritems(), # key = lambda x: x[1])]) ys = c # ax.scatter(*array([[gene_srtidxs[k],genome_coords[k]] for k in gene_union]).T) # ax.plot(xs, ys + random.rand(len(ys))*.1, color = random.rand(3), # alpha = .25) cc = corrcoef(gc.T) ax.imshow(cc[:1000:1, :1000:1], aspect="auto") f.savefig(myplots.figpath("coex_counts_per_tissue.pdf")) return cc return genes, gene_counts, gene_info
def show_fixations(all_fixations, cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star', 'torus', 'torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c = name_colors[j] task_10ptime = array([[item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0), 0))[0] frac_allowed = float(len(idxs_allowed)) / shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed) != 0: nrml = task_10ptime[:, idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i, j, :] = deltas[:-1] all_counts[i, j] = len(idxs_allowed) all_fracs[i, j] = frac_allowed mut_spool.append({'tuple': (i, j), 'mut': m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t, v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n = len(p10_times) these_x = (zeros(n) + i) + random.uniform(size=n) / 3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2, (6, 6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8, 8)) ax = f2.add_axes([.3, .3, .6, .6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]], cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks, -1, 0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation=-90, va='top', ha='left') rows = [] labels = [] for ms in sorted(mut_spool, key=lambda x: x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0], tup[1], :]) ct = all_counts[tup] frac = all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows, interpolation='nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
def show_fixations(all_fixations,cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star','torus','torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c= name_colors[j] task_10ptime = array([ [item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0),0))[0] frac_allowed =float( len(idxs_allowed))/ shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed )!= 0: nrml = task_10ptime[:,idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i,j,:] = deltas[:-1] all_counts[i,j] = len(idxs_allowed) all_fracs[i,j] = frac_allowed mut_spool.append({'tuple':(i,j), 'mut':m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t,v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n= len(p10_times) these_x = (zeros(n) + i ) + random.uniform(size = n)/3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2,(6,6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8,8)) ax = f2.add_axes([.3,.3,.6,.6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]],cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks,-1,0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation = -90, va = 'top', ha = 'left') rows = [] labels = [] for ms in sorted(mut_spool, key = lambda x:x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0],tup[1],:]) ct = all_counts[tup] frac =all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows,interpolation = 'nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
def show_clusters(mods, genes, tfs, switch_axes = False, axes = 'space'): mod_srt = sorted(mods.iteritems(), key =lambda x: len(x[1]))[::-1] mod_ofinterest = mod_srt[0] f = myplots.fignum(3, (14,8)) ids,tis, cs = [[] for i in range(3)] n_tis = 60; ct = mycolors.getct(n_tis) for i,t in enumerate(tsrt[:n_tis]): nucs = [x[0] for x in t[1]['cts'] if x[1] == time_val][::5] ids.extend(nucs) tis.extend([i] * len(nucs)) cs.extend([ct[i] for j in range( len(nucs))]) #GET COORDS coords = array(list(get_coords(axes = axes, time_val = time_val, spatial_idxs = spatial_idxs, rows = rows, ids = ids))) print shape(coords) print np.min(coords), np.max(coords) #GET ELPSES all_elps = cluster_elps(coords, tis) module_scores = {} mods_of_interest = mod_srt[:3] for m in mods_of_interest: mcounts = [] for i, e in enumerate(all_elps[0]): t = tsrt[i] tkey = t[0] mcounts.append( len([e for elt in m[1] if elt['tissue'] == tkey])) mvals = array(mcounts, float) / max(mcounts) module_scores[m[0]] = mvals #PLOT EM all_axes = [f.add_subplot(ax_str) for ax_str in ['221', '222', '223', '224']] ax0 = all_axes[0] ax1 = all_axes[1] cnv = mpl.colors.ColorConverter() for j, ax in enumerate([ ax0, ax1]): if switch_axes: myplots.padded_limits(ax, coords[0,:], coords[j+1,:]) else: myplots.padded_limits(ax, coords[0,:], coords[1,:]) elps = all_elps[j] for i, e in enumerate(elps): yellow =squeeze(\ mpl.colors.rgb_to_hsv( array(cnv.to_rgb('yellow'))[na, na, :])) red = array([0.,1,1]) mscore = module_scores.values()[j][i] hsv = yellow * (1 - mscore) + red * mscore e.set_alpha(.75) e.width = e.width/3 e.height = e.width/3 e.set_zorder(mscore) color = squeeze([1,0,0]) alpha = mscore e.set_facecolor(color) e.set_alpha(alpha) e.set_edgecolor('none') ax.add_patch(e) for annote_axis , plane in zip(*[[ax0, ax1],['XY','XZ']]): annote_axis.set_title('\n'.join(tw.wrap('''PCA projection in gene space of'''+\ ' blastoderm nuclei for time = {0}.'.format(time_val)+\ 'Colors represent clusters used in'+\ 'model building\n{1} Axes, {2} Plane'.\ format(time_val,axes, plane),50))) annote_axis.set_xticks([]) annote_axis.set_yticks([]) f.savefig(myplots.figpath('first_filtering_time{0}_Axis={1}.pdf'.\ format(time_val,axes), delete = True)) raise Exception()
def map_pca(indy_arr, indy_info, pca = None, metagroup = 'all', **kwargs): ''' output of indy_arr must be set to linear. ''' metagroups = region_groups(indy_info) if metagroup == 'all': inds_allowed = arange(len(indy_arr)) else: inds_allowed = metagroups[metagroup]['indy_idxs'] #fetch some global variables regions = array(indy_regions(indy_arr, indy_info)) iis = [x[1] for x in sorted([(k,v) for k,v in indy_info.iteritems()], key = lambda x:x[0])] #and then extract the metaregion indy_arr = indy_arr[inds_allowed] regions = regions[inds_allowed] iis = [iis[ia] for ia in inds_allowed] #compute pca if none is given if pca == None: corr = cc(indy_arr) indy_pcs, gene_pcs, s = cc_svd(corr, indy_arr) pca = gene_pcs #and from pca, coordinates xys = dot(indy_arr, pca[:,:2]).T #then plot ct = array(mycolors.getct(max(regions)+1)) r_abbrevs = region_abbrevs() f = myplots.fignum(1,(14,8)) ax = f.add_subplot(111) ax.set_title('top two principle component projection of individual genotypes') for k,g in it.groupby(sorted(enumerate(regions), key = lambda x: x[1]), key = lambda x: x[1]): grp = list(g) e0 = grp[0][0] i0 = iis[e0] ax.scatter(xys[0,[e[0] for e in grp]], xys[1,[e[0] for e in grp]] , color = ct[k], label = '{0} - {1}'.format(i0['region_symb'], i0['region_desc'])) #plot regionwide ellipses c_imap = dict([(v,k) for k,v in enumerate(set(regions))]) c_rmap = dict([(v,k) for k,v in c_imap.iteritems()]) clusters = array([c_imap[r] for r in regions]) csort = argsort(clusters) elps, infos = elpsfit.cluster_ellipses(xys[:,csort],clusters[csort]) for i, e in enumerate(elps): e.set_alpha(.75) e.set_facecolor(ct[c_rmap[i]]) e.set_edgecolor('none') maj = e.width minor = e.height e.height = maj * 2 e.width = minor * 2 ax.add_patch(e) ax.legend() f.savefig(myplots.figpath('pca_grouping={0}.pdf'.format(metagroup))) return
def load(plots = defplots, reset = False): kwargs = dict(reset = reset) edge_set = get_edge_set() g = get_graph(**mem.sr(kwargs)) pos = get_pos(**mem.sr(kwargs)) trips = set([]) for k1 in g: for k2 in g[k1].keys(): for k3 in g[k1].keys(): if g[k2].has_key(k3): trips.add((k2,k3,k1)) tripoints = dict([((e[0],e[1]),pos[e[2]]) for e in trips]) if plots.get('basic_structure', False): f = myplots.fignum(1) ax = f.add_subplot(111) gd.easy_draw(g, pos) f.savefig(myplots.figpath('basic_structure_edges={0}'.format(edge_set))) if plots.get('feed_forward', True): gd.overlay(g,pos,g.edges(), tripoints = tripoints, alphas = dict([(e,.1) for e in g.edges()])) f.savefig(myplots.figpath('feed_forward_edges={0}'.format(edge_set))) if plots.get('degrees' , False): make_degree_plots_0(); maxflow = nx.algorithms.ford_fulkerson(g, 'AVAL','PVPL','weight') imaps = get_array_imaps() nnames = imaps['nnames'] node_stats = dict([(k,{}) for k in nnames]) for k,v in node_stats.iteritems(): v['out_degree'] = len([e for e in g.edges() if e[0] == k]) v['in_degree'] = len([e for e in g.edges() if e[1] == k]) f = myplots.fignum(3, (12,6)) outs = [v['out_degree'] for k, v in node_stats.iteritems()] ins =[v['in_degree'] for k , v in node_stats.iteritems()] raw_data= array([outs,ins]).T make_data_transform(raw_data) data = transform_data(raw_data) kd = make_kdtree(data) k = 5 nn = compute_nns(kd, k) knn= nn['nn'] knn_dists = nn['dists'] dists = compute_dists(data) mean_dists = np.mean(knn_dists[:,1:],1) mean_colors =sqrt(mean_dists[:,newaxis] * [1/np.max(mean_dists), 0,0]) ax = f.add_subplot(121) ax.scatter(data[:,0],data[:,1],s = 15, facecolor = mean_colors, edgecolor = 'none' ) ax.set_xlabel('scaled out degree') ax.set_ylabel('scaled in degree') ax2 = f.add_subplot(122) ax2.imshow(dists, interpolation = 'nearest', aspect = 'auto') ax2.set_title('distance matrix for scaled degrees') f.savefig(myplots.figpath('distances_{0}'.format(edge_set))) return g
def run(): chains = parse_all() c0 = chains[0] ksrt =sorted(c0.keys()) charges = array([ sum([float(e['charge']) for e in c0[k]['pqr']]) for k in ksrt]) coords = array([ mean([e.get_coord() for e in c0[k]['pdb']], 0 ) for k in ksrt]) c1,c2 = chains[1:] strand_charges = [] strand_coords = [] for c in [c1, c2]: ksrt =sorted(c.keys()) strand_charges.append(array([ sum([float(e['charge']) for e in c[k]['pqr']]) for k in ksrt])) strand_coords.append(array([ mean([e.get_coord() for e in c[k]['pdb']], 0 ) for k in ksrt])) k1 = sorted([nt for nt in c1]) k2 = sorted([nt for nt in c2]) s1_atoms = list(it.chain(*[ [e.get_coord() for e in c1[k]['pdb'] ] for k in k1])) s2_atoms = list(it.chain(*[ [e.get_coord() for e in c2[k]['pdb'] ] for k in k2])) dna_atoms = [] dna_atoms.extend(s1_atoms) dna_atoms.extend(s2_atoms) dna_atoms = array(dna_atoms) #nearest neighbor params: kres =0 katoms_dna = 3 kres_atoms = 3 rvd_res = rvd_residues(c0,kres) xs = [] ys = [] cs = [] ss = [] ecs = [] rdists = [] rvd_groups = []; for i, rvd in enumerate(rvd_res): for r in rvd: atoms = array([e.get_coord() for e in r['pdb']]) dists = sum(square(atoms),1)[:,newaxis] + \ sum(square(dna_atoms),1)[newaxis,:] - \ 2 *sum(atoms[:,newaxis,:] * dna_atoms[newaxis,:,:],2) atom_srt_dists = np.sort(dists, 1) atom_knn_avgdist = np.mean(atom_srt_dists[:,:katoms_dna],1) res_srt_dists = np.sort(atom_knn_avgdist) res_k_avgdist = res_srt_dists[:kres_atoms] xs.append(mean(atoms[:,0])) ys.append(mean(atoms[:,1])) #colors = array([1,0,0]) * 1/atom_knn_avgdist[:,newaxis] cs.append(1/res_k_avgdist) rdists.append(res_k_avgdist) ss.append(50) ecs.append('none') rvd_groups.append(i) show_helix = False if show_helix: cs = array(cs) cs /= np.max(cs) f = mp.fignum(1, (12,12)) ax = f.add_subplot(111) ax.scatter(xs,ys,c = cs, s= ss, edgecolor = ecs) f.savefig(mp.figpath(figt.format('tal_rvd_neighborhoods'))) rvd_dists = [(k,[e[1] for e in list(g)]) for k,g in it.groupby(zip(rvd_groups,rdists), lambda x: x[0])] rs = rvds() tags = [ seq()[r:r+2] for r in rs ] nt = len(set(tags)) tag_idx_in_ct = dict([(e,i) for i,e in enumerate(set(tags))]) rvd_ct_map = dict([(i,tag_idx_in_ct[e]) for i,e in enumerate(tags)]) ct = mycolors.getct(nt) f = mp.fignum(3, (12,12)) ax= f.add_subplot(111) ax.set_xlabel('linear distance') ax.set_ylabel('nearest neighbor distance to DNA') labels_set = set([]) for k, g in rvd_dists: if tags[k] in labels_set: ax.plot(g, color = ct[rvd_ct_map[k]]) else: labels_set.add(tags[k]) print 'labelling' ax.plot(g, color = ct[rvd_ct_map[k]],label = tags[k]) ax.legend() f.savefig(mp.figpath(figt.format('tal_rvd_distances'))) #plot_charges(coords, charges, strand_coords) return