def align_len_histogram(parsed): p0 = parsed.values()[0] bitlens = array( [sorted([e['bits'] for e in val.values()])[:-1] for val in p0.values()]).flatten() bitlens = array(list(it.chain(*bitlens))) bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8 mind= 8 #min(deg_c.values())+.00001 maxd = max(bitlens) #max(deg_c.values())/3 bins = linspace(mind,maxd,8) h_paths,bin_edges = histogram(bitlens,bins) h_paths = array(h_paths,float) h_paths/= sum(h_paths) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color = 'red') ax.set_xlabel('alignment hit length') ax.set_ylabel('frequency') ax.set_title('best matched substring lengths') raise Exception() f.savefig(myplots.figpath('walk_centrality')) paths_cat = paths.flat n = len(paths_cat) degs = [deg_c[p]for p in paths_cat[::10]] mind= min(deg_c.values())+.00001 maxd = max(deg_c.values())/3 bins = linspace(mind,maxd,8) h_paths,bin_edges = histogram(degs,bins) h_rand,bin_edges = histogram(deg_c.values(), bins) h_paths = array(h_paths,float) h_rand = array(h_rand,float) h_paths/= sum(h_paths) h_rand/= sum(h_rand) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color = 'red') ax.plot(bins[:-1], h_rand, color = 'black') ax.set_xlabel('node centrality') ax.set_ylabel('frequency') ax.set_title('distribution of centrality in walks vs. random')
def align_len_histogram(parsed): p0 = parsed.values()[0] bitlens = array([ sorted([e['bits'] for e in val.values()])[:-1] for val in p0.values() ]).flatten() bitlens = array(list(it.chain(*bitlens))) bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8 mind = 8 #min(deg_c.values())+.00001 maxd = max(bitlens) #max(deg_c.values())/3 bins = linspace(mind, maxd, 8) h_paths, bin_edges = histogram(bitlens, bins) h_paths = array(h_paths, float) h_paths /= sum(h_paths) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color='red') ax.set_xlabel('alignment hit length') ax.set_ylabel('frequency') ax.set_title('best matched substring lengths') raise Exception() f.savefig(myplots.figpath('walk_centrality')) paths_cat = paths.flat n = len(paths_cat) degs = [deg_c[p] for p in paths_cat[::10]] mind = min(deg_c.values()) + .00001 maxd = max(deg_c.values()) / 3 bins = linspace(mind, maxd, 8) h_paths, bin_edges = histogram(degs, bins) h_rand, bin_edges = histogram(deg_c.values(), bins) h_paths = array(h_paths, float) h_rand = array(h_rand, float) h_paths /= sum(h_paths) h_rand /= sum(h_rand) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.plot(bins[:-1], h_paths, color='red') ax.plot(bins[:-1], h_rand, color='black') ax.set_xlabel('node centrality') ax.set_ylabel('frequency') ax.set_title('distribution of centrality in walks vs. random')
def snp_counts(indy_arr, indy_info): regions = indy_regions(indy_arr, indy_info) ct = mycolors.getct(len(regions)) skip = 1 ofs = 4 f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) n_snps = 20 rset = set(regions) rcounts = zeros((len(rset), n_snps)) xs = [] ys = [] cs = [] rs = [] for i, snp in enumerate(indy_arr.T[4::5][:50]): rsub = array(regions[::100],float) / max(regions[:20]) inds = argsort(rsub) ys.extend([i] *len(rsub)) rs.extend([10 + 30 * (1 - snp[:2:100])]) xs.extend(rsub) print i ax.scatter(xs,ys,rs) f.savefig(myplots.figpath('regional_snp_counts_first.pdf')) return
def plotPeaks(num = 1): import cb.utils.plots as myplots def setHist(**kwargs): peaks = getPeaks()['chr{0}'.format(num)] proms = getTrackChrPromoters(num = num) all_hits = zeros(20) for k,v in proms.iteritems(): mid =(v[0] + v[1]) / 2 deltas = [] for p in peaks: pmid = (p['start'] + p['end'])/2 if abs(pmid - mid) < 5000: deltas.append(pmid - mid) hits, bin_offsets = histogram(deltas, 20, [-5000,5000]) all_hits += hits; return bin_offsets, all_hits; bin_offsets, hits = mem.getOrSet(setHist, num = num) f = myplots.fignum(1) ax = f.add_subplot(111) ax.set_xlabel('distance from promoter') #ax.set_xticks(bin_offsets) #ax.set_xticklabels(['{0}'.format(e) for e in bin_offsets]) ax.set_ylabel('counts') ax.plot(bin_offsets[:-1],hits)
def analyze(): f = myplots.fignum(1) gl = get_data()['gluc'] cl = get_data()['cluc'] ax = f.add_subplot(111) ax.imshow(cl / gl, aspect='auto', interpolation='nearest') ax.set_title('Enrichment of Cluc over control Gluc') path = myplots.figpath('corr_matrix.pdf') f.savefig(path) f.clear() ax = f.add_subplot(121) glf = gl.flatten()[:-6] clf = cl.flatten()[:-6] n_mm = array([[e, e, e] for e in [0, 2, 2, 2, 3, 3, 3, 0, 2, 2, 2, 3, 3, 3]], float).flatten() ax.set_title('cluc enrichment vs mm count') ax.set_xlabel('mismatch count') ax.set_ylabel('fold enrichment ocluc') ax.scatter(n_mm, clf / glf) pf1 = polyfit(n_mm, clf / glf, 1) pf2 = polyfit(n_mm, clf / glf, 2) ax.plot(polyval(pf1, [0, 2, 3])) path = myplots.figpath('enrichment_vs_mm.pdf') f.savefig(path) ax2 = f.add_subplot(121)
def analyze(): f = myplots.fignum(1) gl = get_data()['gluc'] cl = get_data()['cluc'] ax = f.add_subplot(111) ax.imshow(cl / gl, aspect = 'auto', interpolation = 'nearest') ax.set_title('Enrichment of Cluc over control Gluc') path = myplots.figpath('corr_matrix.pdf') f.savefig(path) f.clear() ax = f.add_subplot(121) glf = gl.flatten()[:-6] clf = cl.flatten()[:-6] n_mm = array([ [e,e,e] for e in [0,2,2,2,3,3,3,0,2,2,2,3,3,3]], float).flatten() ax.set_title('cluc enrichment vs mm count') ax.set_xlabel('mismatch count') ax.set_ylabel('fold enrichment ocluc') ax.scatter(n_mm,clf/glf) pf1 = polyfit(n_mm, clf/glf, 1) pf2 = polyfit(n_mm, clf/glf, 2) ax.plot(polyval(pf1,[0,2,3])) path = myplots.figpath('enrichment_vs_mm.pdf') f.savefig(path) ax2 = f.add_subplot(121)
def run(meth = 'moment'): out,srts = bs.run0(arr = arr, itr = 2, meth = meth) f = myplots.fignum(3,(12,6)) ax = f.add_subplot(111) csrts = [s for s in srts if len(s) == len(cols)][0] rsrts = [s for s in srts if len(s) == len(rows)][0] cprint = [rows[rs] for rs in rsrts] rprint = [cols[cs] for cs in csrts] im = ax.imshow(out, interpolation= 'nearest', cmap = plt.get_cmap('OrRd'), ) #flip the rows and columns... looks better. ax.set_xticks(arange(len(cols))+.25) ax.set_yticks(arange(len(rows))+.25) ax.set_yticklabels([e for e in cprint]) ax.set_xticklabels(rprint) print 'rows: \n{0}'.format(', '.join([e.strip() for e in rprint])) print print 'cols: \n{0}'.format(', '.join([e.strip() for e in cprint])) plt.colorbar(im) f.savefig(myplots.figpath('correlation_plot_2_4_{0}.pdf') .format(meth)) return
def gdraw0(graphs, plotname = 'default_name', measure = 'cosine'): pos = nx.graphviz_layout(graphs['kg']) adjs = [ array(nx.adj_matrix(g)) for g in graphs.values() ] nrms = [] for a in adjs: n = sqrt(sum(a**2)) nrms.append(a / n) kgelt = graphs.keys().index('kg') if measure == 'cosine': sims = array([round(nfu.cosine_adj(a1,nrms[kgelt]),8) for a1 in nrms]) else: raise Exception() kg = graphs['kg'] srto = argsort(graphs.keys()) #XVALs give ranks of each key index. xvals = argsort(srto) cols = map(lambda x: ('flt' in x and x.count('thr') > 1) and 'orange' or ('flt' in x) and 'red' or ('thr' in x) and 'yellow' or ('fg' in x) and 'green' or ('su' in x) and 'blue' or 'black', graphs.keys()) yvals = sims f = plt.gcf() f = myplots.fignum(3, (.25 * len(sims),10)) f.clear() ax = f.add_subplot(111) myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02]) ax.scatter(xvals,yvals,100, color = cols) ax.set_ylabel('red fly similarity ({0})'.format(measure)) ax.set_xlabel('networks') ax.set_xticklabels([]) ax.set_xticks([]) mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1] ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2) f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\ format(plotname,measure))) ax.set_xticks(range(len(srto))) ax.annotate('\n'.join(' '.join(z) for z in zip(graphs.keys(),cols)), [0,1],xycoords = 'axes fraction', va = 'top') ax.set_xticklabels([graphs.keys()[i] for i in srto], rotation = 45, size = 'xx-small',ha = 'right') f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\ format(plotname,measure)))
def make_edge_comparisons(cgraphs, bgraphs): cgsets = dict([(k, set(v.edges())) for k, v in cgraphs.iteritems()]) for bname, bg in bgraphs.iteritems(): #if bname != 'kn': continue f = myplots.fignum(3,(8,8)) f.clear() axes = [f.add_subplot(311), f.add_subplot(312), f.add_subplot(313)] ccolors = dict(zip(cgraphs.keys(), mycolors.getct(len(cgraphs)))) bgset = set(bg.edges()) yvals = {'jaccard':[], 'spec':[], 'sens':[]} xofs = 0 heights, xvals ,colors ,names= [], [], [], [] for cname, cg in sorted(cgraphs.iteritems(), key = lambda x: x[0]): cgset = set(cg) #SIMILARITIES MATCHING THE ORDER OF SIMNAMES yvals['jaccard'].append(float(len(bgset.intersection(cgsets[cname])))/\ len(bgset.union(cgsets[cname]))) yvals['spec'].append( float(len(bgset.intersection(cgsets[cname])))/\ len(cgsets[cname])) yvals['sens'].append( float(len(bgset.intersection(cgsets[cname])))/\ len(bgset)) #colors.extend([ccolors[cname]] * len(sims)) #heights.extend(sims) names.append(cname ) #xvals.extend(xofs +arange(len(sims))) #xofs = max(xvals) + 2 #if cname == 'unsup': raise Exception() for j, elt in enumerate(yvals.iteritems()): metric_name = elt[0] heights = elt[1] print heights ax = axes[j] xvals = argsort(argsort(heights)) ax.bar(xvals, heights, color = [ccolors[n] for n in names]) ax.set_title('edge similarity vs {0}, metric: {1}'.\ format(bname, metric_name)) myplots.color_legend(f, ccolors.values(), ccolors.keys()) #for i , n in enumerate(names): # ax.annotate(n, [xvals[i], .001], # xycoords = 'data', # xytext = [2,0], # textcoords = 'offset points', # rotation = 90, va = 'bottom', ha = 'left') f.savefig(figtemplate.format('edges_vs_{0}'.format(bname)))
def plot_city_posts(**kwargs): cp = city_posts(**mem.sr(kwargs)) xs = cp["lons"] ys = cp["lats"] rs = [len(x) for x in cp["posts"]] f = myplots.fignum(3, (4, 4)) ax = f.add_subplot(111) ax.scatter(xs, ys, s=rs)
def make_degree_plots_0(): cxns = get_synapse_array() rows = get_rows() imaps = get_array_imaps() ctypes =imaps['ctypes'] ctypes_imap = imaps['ctypes_imap'] nnames = imaps['nnames'] nnames_imap = imaps['nnames_imap'] f2 = myplots.fignum(2, (12,6)) ax1 = f2.add_subplot(121) ax2 = f2.add_subplot(122) var_degs = np.sum(cxns,1) maxval = log10(np.max(var_degs) + 1) ct = mycolors.getct(len(ctypes)) for z in range(len(ctypes)): vals = var_degs[:,z] vals = log10(1 + vals) count,kde = make_kde(vals) xax = linspace(0,maxval,10) h = histogram(vals, xax) ax1.hist(vals,xax, color = ct[z], zorder = 10, alpha = .25) ax1.plot(xax,kde(xax)*sum(h[0]), label = ctypes[z], color = ct[z], zorder = 5) ax1.set_xlabel('$log_10$ of edge degrees of various types') ax1.legend() logxy = [ log10(1 +var_degs[:,ctypes_imap['S']]), log10(1 +var_degs[:,ctypes_imap['R']])] max_inode =np.argmax(logxy[0] + logxy[1]) max_nodename = [k for k,v in nnames_imap.iteritems() if v == max_inode][0] ax2.scatter(logxy[0]+.15*random.rand(len(nnames)) ,logxy[1] + .15*random.rand(len(nnames)), color = 'red', alpha = .3) ax2.set_xlabel('Sending Degree') ax2.set_ylabel('Receiving Degree') r2 = corrcoef(logxy[0],logxy[1])[1,0] myplots.maketitle(ax2, ('correlation coeff: {0:2.2},\n'+\ 'max {1} has {2} $e_{{out}}$, {3} $e_{{in}}$')\ .format(r2, max_nodename, var_degs[max_inode, ctypes_imap['S']], var_degs[max_inode, ctypes_imap['R']])) myplots.maketitle(ax1, 'histogram and KDE of\nvarious edge degrees') f2.savefig(myplots.figpath('degree_histograms_{0}'.format(edge_set)))
def plot_easy_inference(): dg = io.getGraph() pos = gd.getpos(dg) f = myplots.fignum(4, (8,8)) ax = f.add_subplot(111) ax.set_title('putative worm chip network') gd.easy_draw(dg, pos) f.savefig(myplots.figpath('worm_chip_graph.pdf'))
def snp_count_plot(indy_arr, indy_info): regions = indy_regions(indy_arr, indy_info) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) for row in indy_arr.T[4::5][:50]: rsub = array(regions[:500],float) / max(regions[:500]) inds = argsort(rsub) ax.plot(row[:500][inds] + random.rand(500) * .1) ax.plot(rsub[inds], linewidth = 5) return
def peak_thr_histograms(**kwargs): '''histograms of score and distance''' dthr = 1500 sthr = 1e-2 dsign = -1 simple = wp.get_simple_thr(**mem.rc(kwargs, dthr = dthr, sthr = sthr, dsign = dsign ) ) min_score = -1 max_score = -1 for k,v in simple.iteritems(): smax = np.max(v['scores']) if max_score == -1 or smax > max_score: max_score = smax smin = np.min(v['scores']) if min_score == -1 or smin < min_score: min_score = smin lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))] sbin_mids = range(lrange[0],lrange[1]+1) nsb = len(sbin_mids) sbins = zeros((nsb)) dbin_size = 50 dbin_mids = range(-dthr, dthr, dbin_size) ndb = len(dbin_mids) dbins = zeros(( ndb)) for k,v in simple.iteritems(): for d in v['dists']: dbins[int(d + dthr)/dbin_size] += 1 for s in v['scores']: sbins[int(log10(s) - lrange[0])] += 1 f= myplots.fignum(1,(10,6)) ax = f.add_subplot(121) ax.set_ylabel('log 10 counts') ax.set_xlabel('distance') ax.set_title('simplified tss distances (d<{0})'.format(dthr)) ax.plot(dbin_mids,log10(dbins), color = 'black') f.savefig(myplots.figpath('chip_simple_distance.pdf')) ax = f.add_subplot(122) ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr)) ax.set_ylabel('log10 counts') ax.set_xlabel('log10 peak score') ax.plot(sbin_mids,log10(sbins), color = 'black') f.savefig(myplots.figpath('chip_simple_scores.pdf'))
def plot_mers(mer_cts): f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) hist, bin_edges = histogram(mer_cts.values(), 20) ax.fill_between(bin_edges[:-1], log(hist), edgecolor='black', linewidth=5) ax.set_xlabel('mer rediscovery rate') ax.set_ylabel('$log(n)$') ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.') f.savefig(myplots.figpath('mer_ct_hist')) return
def position_activities(cons, seqs, activities, show_wt = False): induction = array([a[0] / a[1] for a in activities]) l = len(cons) #wt = [ mean([ # val for val in induction[:,i]] if )[nonzero[ for i in range(l)] #[for i, let in enumerate(seq)] for seq in seqs.T #wt = [[induction[j] # for j, let in enumerate(seq) if let == cons[i] ] # for i, seq in enumerate(seqs.T)] mut = [[induction[j] for j, let in enumerate(seq) if let != cons[i] ] for i, seq in enumerate(seqs.T)] percentiles = [10**x for x in range(-5,2) ]+ [40] percentiles = percentiles + [100 -p for p in percentiles] f = myplots.fignum(2, (8,8)) f.clear() ax = f.add_subplot(111) mtiles = [] for i,mt in enumerate( mut): if not mt: mtiles.append([nan] * len(percentiles)) else: ax.scatter( zeros(len(mt)) + i, \ log(mt) + random.random(len(mt))*.1,\ 2 , color = 'black',alpha = .05) mtiles.append([percentile(mt,p) for p in percentiles]) #ax.plot(arange(len(wmeans)), wmeans, color = 'blue', linewidth = 6,alpha = .2) mtiles = array(mtiles) for mmeans in mtiles.T: ax.plot(arange(len(mmeans)), log(mmeans), color = 'red', linewidth = 3, alpha = .6) #ax.annotate('$R^2 = {0}$'.format(rsquared), # [1,1], xycoords = 'axes fraction', # ha = 'right', va = 'top') ax.set_xlabel('position mutated (~10 per sequence)') ax.set_ylabel('log induction (induced expr/uninduced)') ax.set_title('Induction ratios for sequences mutated at points') figtitle = 'single_position_percentiles' f.savefig(figtemplate.format(figtitle))
def mut_counts(cons, seqs, name = promoter_type): l = len(cons) figtitle = '{0}_mut_counts'.format(name) f = myplots.fignum(1, (8,8)) ax = f.add_subplot(111) counts = zeros(( 4, l)) lets = ['A','T','G', 'C'] for pos in range(l): counts[:,pos] = [0 if let == cons[pos] else list(seqs[:,pos]).count(let) for let in lets ] seismic.seismic(counts, ax = ax) f.savefig(figtemplate.format(figtitle))
def peak_distance_histogram(**kwargs): atype = kwargs.get('atype', wp. default_atype) chips = wp.get_assay_gprops(**mem.rc(kwargs)) chiplist = chips.values() chipkeys = chips.keys() xs = [] ys = [] sec_spread = np.max([ np.max([ np.max(np.abs([e['dist'] for e in v2['secondaries']])) for v2 in v.values()]) for v in chips.values() ]) hist_spread = 10000 bin_wid = 200 bin_mids = arange(-1* hist_spread, 1*hist_spread,bin_wid) bin_starts = bin_mids - bin_wid/2 nb = len(bin_starts) prim_hists = zeros((len(chips), len(bin_starts))) sec_hists = zeros((len(chips), len(bin_starts))) for i,e in enumerate(chiplist): for k,v in e.iteritems(): pbins = array([e2['dist']/bin_wid for e2 in v['primaries']],int) sbins = array([e2['dist']/bin_wid for e2 in v['secondaries']],int) pbins += nb /2 sbins += nb /2 sbins[less(sbins,0)] = 0 sbins[greater(sbins,nb-1)] = nb-1 pbins[less(pbins,0)] = 0 pbins[greater(pbins,nb-1)] = nb-1 for b in pbins: prim_hists[i][b]+=1 for b in sbins: sec_hists[i][b]+=1 f= myplots.fignum(1,(8,6)) ax = f.add_subplot(111) ax.set_title('chip peak distances to primary/sec tss for {0}'.format(atype)) for p in prim_hists: ax.plot(bin_mids,p, color = 'green') for s in sec_hists: ax.plot(bin_mids,s, color = 'red') f.savefig(myplots.figpath('chip_distance_hists_for{0}.pdf'.format(atype)))
def plot_mers(mer_cts): f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) hist,bin_edges = histogram(mer_cts.values(), 20) ax.fill_between(bin_edges[:-1], log(hist), edgecolor = 'black', linewidth = 5) ax.set_xlabel('mer rediscovery rate') ax.set_ylabel('$log(n)$') ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.') f.savefig(myplots.figpath('mer_ct_hist')) return
def load(res = 25): if res == 25: fpath = '/data/brain_atlas/AtlasAnnotation25.sva' else: raise Exception() print 'path: ', fpath size = os.path.getsize(fpath) n = 10000 skip = size / n f = open(fpath) f.readline() f.readline() coords = [] evals = [] while( len(coords) < n): f.seek(skip, 1) l0 = f.readline() l = f.readline() if( l == ''): break; lvals =[float(v) for v in l.split(',')] coords.append(tuple(lvals[0:3])) evals.append(lvals[3]) print 'len: ', (len(coords)) fig = myplots.fignum(1,(8,6)) ax = fig.add_subplot(111, projection='3d') xyvals = array([[x[0],x[1], x[2]] for x in coords]) evals = array(evals) evals = (evals / np.max(evals) )[:,newaxis] * array([1.,0,0]) print shape(xyvals) ax.scatter(xyvals[:,0], xyvals[:,1], xyvals[:,2], s = 5, edgecolor = 'none', facecolor = evals) #raise Exception() path = myplots.figpath('brainmap_spatial_coords_{0}'.format(res)) fig.savefig(path) return coords
def motif_num_occurrence_vs_induction(mgroup = None, mtuple = None, hit = True, induction_type = 'ratio'): if mgroup != None: mtuples = motif_grps(mgroup, hit = hit) mdict = get_motif_dicts() muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples]))) else: muts_allowed = set(get_motif_dicts()[mtuple]) inductions = get_mean_induction() motifs = get_motifs() seqs, rndvals, keys = get_mutants() if induction_type == 'ratio': mut_inductions =(rndvals[:,0] / rndvals[:,1]) elif induction_type == 'on': mut_inductions = rndvals[:,0] elif induction_type == 'off': mut_inductions = rndvals[:,1] inductions = dict([(keys[i], mut_inductions[i] ) for i in range(len(rndvals)) ] ) if mgroup == None: figtitle = 'motifs/ind_type={1}/occurence_v_induction_tuple={0}'.\ format(mtuple, induction_type) else: figtitle = 'motifs/ind_type={1}/occurence_v_induction_group={0}'.\ format(mgroup, induction_type) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) ax.scatter(*zip(*[(log(inductions[keys[i]]), len(motifs[keys[i]])) for i in muts_allowed])) ax.set_ylabel('number of motifs found') ax.set_xlabel('log induction') ax.annotate(figtitle, [1,1], va = 'bottom', ha = 'right', xycoords = 'figure fraction') fpath = figtemplate.format(figtitle) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) f.savefig(figtemplate.format(figtitle))
def process_rc(cc, rows, cols, meth="binary"): rmembers = zeros(len(rows)) + 4 cmembers = zeros(len(rows)) + 4 for i, r in enumerate(rows): rmembers[i] = argmax(r) if np.max(r) > 0 else len(r) for i, c in enumerate(cols): cmembers[i] = argmax(c) if np.max(c) > 0 else len(c) rorder = argsort(rmembers) corder = argsort(cmembers) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) ax.imshow(cc[rorder][:, corder], aspect="auto", interpolation="nearest") f.savefig(myplots.figpath("biclustered_expr_{0}.pdf".format(meth))) raise Exception()
def plot_charges(coords, charges, strand_coords): f0 = mp.fignum(1, (6,6)) ax = f0.add_subplot(111) colors = [ 'red' if q > 0 else 'blue' for q in charges] ax.scatter(*coords[:,:2].T, c = colors, s = 50, zorder = 5) ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = 1, zorder = -1) ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = 1, zorder = -1) ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = .5, zorder = 6) ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = .5, zorder = 6) #ax.scatter(*txy.T, c = 'red') fp = mp.figpath(figt.format('tal_xy_charge_scatter')) f0.savefig(fp) return
def make_transitivity(graphs): f = myplots.fignum(3,(8,8)) udgraphs = dict([(k, nx.Graph(v)) for k, v in graphs.iteritems()]) clusters = dict([(k, nx.algorithms.transitivity(v)) for k, v in udgraphs.iteritems()]) ax = f.add_subplot(111) xax = range(len(clusters.keys())) ax.plot(xax, clusters.values()) ax.set_title('transitivity for network graphs') ax.set_xticks(xax) ax.set_xticklabels(clusters.keys()) figtitle = 'transitivity' fpath = figtemplate.format(figtitle) f.savefig(fpath)
def plot_times(**kwargs): vals = last_5(**mem.rc(kwargs)) xs = [] ys = [] for v in vals: for r in v: raise Exception() ca = r.created_at catime = (((((ca.month * 30) + ca.day) * 24 + ca.hour) * 60 + ca.minute) * 60) + ca.second ys.append(catime) xs = range(len(ys)) f = myplots.fignum(3, (4, 4)) ax = f.add_subplot(111) ax.scatter(xs, ys) return ys
def show_binned_data_1d(descriptors, datum, cmaps): ''' Grab metadata and parsed grids from binned_data_1d and plot them. ''' f = myplots.fignum(1, (6, 8)) gl = len(descriptors) dshapes = [shape(d) for d in datum] ntasks = dshapes[0][-1] task_colors = mycolors.getct(ntasks) for i, e in enumerate(datum): ax = f.add_subplot('{0}1{1}'\ .format(gl,i + 1)) ax.set_title(descriptors[i][0]) sums = np.sum(e, 0) for i, task in enumerate(sums.T): p = ax.plot(log(2 + task[::20]), color=task_colors[i], label=cmaps[i])
def check_results(locii, results, n_runs = 400): a0 = fetch_num_ali() names = fetch_alinames() f = myplots.fignum(3,(8,8)) ax = f.add_subplot(211) vec = zeros(len(a0[0])) xys = {} for k in results.keys(): xys[k] = array([[l,v['Mean z-score']] for v,l in zip(results[k],locii[k]) if len(v) >= 19],float).T raise Exception() ax2 = f.add_subplot(111) ax2.scatter(xys[3][0],xys[3][1]) #ax2.scatter(xys[8][0],xys[8][1], color = 'red') f.savefig(myplots.figpath('run0_zscores_{0}runs'.format(n_runs)))
def align_heatmap(parsed): p0 = parsed.values()[0] bitlens = array( [sorted([e['expect'] for e in val.values()])[:-1] for val in p0.values()]) f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) nodes = set(p0.keys()) for v in p0.values(): nodes = nodes.union(set(v.keys())) nmap = dict([(i, k) for i,k in enumerate(nodes)]) r_nmap = dict([(k,i) for i,k in nmap.iteritems()]) z = zeros((len(nodes),len(nodes))) for k,v in p0.iteritems(): i = r_nmap[k] for k2,v2 in v.iteritems(): j = r_nmap[k2] z[i,j] = 1 / (.0001 + v2['expect']) ax.imshow(z[argsort(sum(z,1)),:][::-1,::-1][:100,:100])
def align_heatmap(parsed): p0 = parsed.values()[0] bitlens = array([ sorted([e['expect'] for e in val.values()])[:-1] for val in p0.values() ]) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) nodes = set(p0.keys()) for v in p0.values(): nodes = nodes.union(set(v.keys())) nmap = dict([(i, k) for i, k in enumerate(nodes)]) r_nmap = dict([(k, i) for i, k in nmap.iteritems()]) z = zeros((len(nodes), len(nodes))) for k, v in p0.iteritems(): i = r_nmap[k] for k2, v2 in v.iteritems(): j = r_nmap[k2] z[i, j] = 1 / (.0001 + v2['expect']) ax.imshow(z[argsort(sum(z, 1)), :][::-1, ::-1][:100, :100])
def get_coexpression(gc, **kwargs): gc = gc / sum(gc, 0) f = myplots.fignum(3, (8, 8)) ax = f.add_subplot(111) for c in gc[:1]: # cplot = nonzero(greater(c,-1))[0][::10] # xs = array([genome_coords[k[0]] # for k in sorted(gene_srtidxs.iteritems(), # key = lambda x: x[1])]) ys = c # ax.scatter(*array([[gene_srtidxs[k],genome_coords[k]] for k in gene_union]).T) # ax.plot(xs, ys + random.rand(len(ys))*.1, color = random.rand(3), # alpha = .25) cc = corrcoef(gc.T) ax.imshow(cc[:1000:1, :1000:1], aspect="auto") f.savefig(myplots.figpath("coex_counts_per_tissue.pdf")) return cc return genes, gene_counts, gene_info
def show_binned_data_1d(descriptors, datum, cmaps): ''' Grab metadata and parsed grids from binned_data_1d and plot them. ''' f = myplots.fignum(1, (6,8)) gl = len(descriptors) dshapes = [shape(d) for d in datum] ntasks = dshapes[0][-1] task_colors = mycolors.getct(ntasks) for i,e in enumerate(datum): ax = f.add_subplot('{0}1{1}'\ .format(gl,i + 1)) ax.set_title(descriptors[i][0]) sums =np.sum(e, 0) for i,task in enumerate(sums.T): p = ax.plot(log(2 + task[::20]), color =task_colors[i], label = cmaps[i])
def show_fixations(all_fixations,cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star','torus','torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c= name_colors[j] task_10ptime = array([ [item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0),0))[0] frac_allowed =float( len(idxs_allowed))/ shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed )!= 0: nrml = task_10ptime[:,idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i,j,:] = deltas[:-1] all_counts[i,j] = len(idxs_allowed) all_fracs[i,j] = frac_allowed mut_spool.append({'tuple':(i,j), 'mut':m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t,v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n= len(p10_times) these_x = (zeros(n) + i ) + random.uniform(size = n)/3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2,(6,6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8,8)) ax = f2.add_axes([.3,.3,.6,.6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]],cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks,-1,0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation = -90, va = 'top', ha = 'left') rows = [] labels = [] for ms in sorted(mut_spool, key = lambda x:x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0],tup[1],:]) ct = all_counts[tup] frac =all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows,interpolation = 'nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
def motif_name_vs_induction(mgroup = None, mtuple = None, hit = True, induction_type = 'ratio'): if mgroup != None: mtuples = motif_grps(mgroup, hit = hit) mdict = get_motif_dicts() muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples]))) else: muts_allowed = set(get_motif_dicts()[mtuple]) motifs = get_motifs() seqs, rndvals, keys = get_mutants() #USE ONLY FILTERED SEQS! seqs, rndvals, keys = \ [seqs[i] for i in muts_allowed],\ array([rndvals[i] for i in muts_allowed]),\ [keys[i] for i in muts_allowed] keys_allowed = set(keys) if induction_type == 'ratio': mut_inductions =(rndvals[:,0] / rndvals[:,1]) elif induction_type == 'on': mut_inductions = rndvals[:,0] elif induction_type == 'off': mut_inductions = rndvals[:,1] inductions = dict([(keys[i], mut_inductions[i] ) for i in range(len(rndvals)) ] ) if mgroup == None: figtitle = 'motifs/ind_type={1}/mname_v_induction_tuple={0}'.\ format(mtuple, induction_type) else: figtitle = 'motifs/ind_type={1}/mname_v_induction_group={0}'.\ format(mgroup, induction_type) f = myplots.fignum(3, (8,8)) m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs] for k, seq_motifs in motifs.iteritems() if k in keys_allowed])) unq_keys = list(set(m_occurences)) kcount =[ m_occurences.count(m ) for m in unq_keys ] msort = nonzero(greater(kcount, 200))[0] #TAKING ONLY THE 10 MOST COMMON MOTIFS unq_keys = [unq_keys[i] for i in msort[0:]] m_total_scores = dict([(mkey, [{'seq':skey, 'score':sum([ elt['score'] for elt in seq_motifs if elt['motif'] == mkey ]), 'starts':[elt['start'] for elt in seq_motifs if elt['motif'] == mkey], 'stops':[elt['end'] for elt in seq_motifs if elt['motif'] == mkey]} for skey, seq_motifs in motifs.iteritems() if skey in keys_allowed]) for mkey in unq_keys]) ax = f.add_subplot(211) ax2 = f.add_subplot(212) count = -1 colors = mycolors.getct(len(unq_keys)) for mname, scores in m_total_scores.iteritems(): count += 1 thr = .15 inds = [log(inductions[elt['seq']]) for elt in scores if elt['score'] > thr] if len(inds)< 3: continue these_scores = [ v['score'] for v in scores if v['score'] > thr] xax = linspace(min(these_scores), max(these_scores),5) pfit = polyfit(these_scores, inds, 1) ax.plot(xax, polyval(pfit,xax), color = colors[count], linewidth = 3) ofs = 0 xseq = arange(len(seqs[0])) for seqelt in scores[:100]: for start,stop in zip(*[seqelt['starts'], seqelt['stops']]): ofs += .25 ax2.plot([start,stop], [ofs, ofs+.2], alpha = .5,color = 'red' if pfit[0] < 0 else 'blue') fpath = figtemplate.format(figtitle) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) f.savefig(figtemplate.format(figtitle)) pass
def motif_dist_v_cooperativity(mgroup = None, mtuple = None, hit = True, induction_type = 'ratio', midpoint = None): if mgroup != None: mtuples = motif_grps(mgroup, hit = hit) mdict = get_motif_dicts() muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples]))) else: muts_allowed = set(get_motif_dicts()[mtuple]) assert midpoint != None motifs = get_motifs() seqs, rndvals, keys = get_mutants() #USE ONLY FILTERED SEQS! seqs, rndvals, keys = \ [seqs[i] for i in muts_allowed],\ array([rndvals[i] for i in muts_allowed]),\ [keys[i] for i in muts_allowed] keys_allowed = set(keys) if induction_type == 'ratio': mut_inductions =(rndvals[:,0] / rndvals[:,1]) elif induction_type == 'on': mut_inductions = rndvals[:,0] elif induction_type == 'off': mut_inductions = rndvals[:,1] inductions = dict([(keys[i], mut_inductions[i] ) for i in range(len(rndvals)) ] ) if mgroup == None: figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_tuple={0}'.\ format(mtuple, induction_type) else: figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_group={0}{2}'.\ format(mgroup, induction_type, '_hit' if hit else '') thr = .25 m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs if elt['score'] > thr] for k, seq_motifs in motifs.iteritems() if k in keys_allowed])) #unq_keys = sorted(list(set(m_occurences))) kcounts = dict([(k, len(list(g))) for k, g in it.groupby(sorted(m_occurences))]) unq_keys = kcounts.keys() kcount =[ kcounts[m] for m in unq_keys ] msort = nonzero(greater(kcount, 5) * less(kcount, 500))[0] msort = sorted(msort, key = lambda x: kcount[x])[::-1] #TAKING ONLY THE 10 MOST COMMON MOTIFS unq_keys = [unq_keys[i] for i in msort[0:]] f = myplots.fignum(3, (8,8)) ax = f.add_subplot(111) colors = mycolors.getct(len(unq_keys)) ct = 0 all_vals = [] for mkey in unq_keys: mscores =[{'seq':skey, 'score': [ elt['score'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr ], 'starts':[elt['start'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr], 'stops':[elt['end'] for elt in motifs[skey] if elt['motif'] == mkey and elt['score'] > thr]} for skey in keys_allowed] vals = list(it.chain(*[[ (midpoint - \ mean([mseq['starts'][i], mseq['stops'][i]]),\ log(inductions[mseq['seq']]) ) for i in range(len(mseq['starts'])) ] for mseq in mscores ] )) if vals: #ax.scatter(*zip(*vals) , # s = 10, alpha = .25, # c = colors[ct]) all_vals.extend(vals) ct += 1 if ct > 1000: break vsrt = sorted(all_vals, key = lambda x: x[0]) xv = [a[0] for a in all_vals] means = zeros(max(xv)+1) #counts = zeros(max(xv)+1) for k, g in it.groupby(all_vals, key = lambda x: x[0]): means[k] = percentile([elt[1] for elt in g], 90) elts = nonzero(means)[0] ax.plot(elts, [means[e] for e in elts]) ax.set_xlabel('distance from strongest promoters') ax.set_ylabel('induction') ax.annotate(figtitle, [0,0], xycoords ='figure fraction', va = 'bottom', ha = 'left') fpath = figtemplate.format(figtitle) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) f.savefig(figtemplate.format(figtitle))
def show_fixations(all_fixations, cmaps): fstats = fixation_stats(all_fixations) ranked_tasks = fstats['ranked_ages'] names = ['s_star', 'torus', 'torus_repl'] #argsort(ages, key = lambda) mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004] xs = [] ys = [] cs = [] name_colors = mycolors.getct(3) task_colors = mycolors.getct(9) all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8)) all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0]))) all_counts = zeros((len(mut_vals), len(all_fixations.values()[0]))) mut_spool = [] for i, m in enumerate(mut_vals): fix_map = all_fixations[m] for j, e in enumerate(fix_map): name_c = name_colors[j] task_10ptime = array([[item[1] for item in e[rt[0]]['1']] for rt in ranked_tasks]) idxs_allowed = nonzero(greater(np.min(task_10ptime, 0), 0))[0] frac_allowed = float(len(idxs_allowed)) / shape(task_10ptime)[1] '''roll deltas for all sxsful completions''' if len(idxs_allowed) != 0: nrml = task_10ptime[:, idxs_allowed] #nrml = 1 deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \ - task_10ptime[:,idxs_allowed]) / \ nrml ,1) all_deltas[i, j, :] = deltas[:-1] all_counts[i, j] = len(idxs_allowed) all_fracs[i, j] = frac_allowed mut_spool.append({'tuple': (i, j), 'mut': m, 'name': j}) for k, e2 in enumerate(e.iteritems()): t, v = e2 task_c = task_colors[k] p10_times = [item[1] for item in v['5'] if item[0] != -1] n = len(p10_times) these_x = (zeros(n) + i) + random.uniform(size=n) / 3 xs.extend(these_x) ys.extend(p10_times) cs.extend([task_c] * n) f = myplots.fignum(2, (6, 6)) ax = f.add_subplot(111) ax.set_title('fixation times for all tasks') ax.set_xlabel('mutation rate') ax.set_ylabel('fixation time') #ax.scatter(xs, ys, 20, color = cs,alpha = .4) f2 = myplots.fignum(3, (8, 8)) ax = f2.add_axes([.3, .3, .6, .6]) ax.set_title('fixation time (fold change over previous tasks)') ax.set_xlabel('task') ax.set_ylabel('condition') xlabels = [(cmaps[e[0][0]], cmaps[e[1][0]]) for e in zip(ranked_tasks, roll(ranked_tasks, -1, 0))][0:-1] ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels], rotation=-90, va='top', ha='left') rows = [] labels = [] for ms in sorted(mut_spool, key=lambda x: x['name']): tup = ms['tuple'] rows.append(all_deltas[tup[0], tup[1], :]) ct = all_counts[tup] frac = all_fracs[tup] mut = ms['mut'] labels.append('{0}: mut rate={1};n={2}'.\ format(names[ms['name']],mut,int(ct),frac) ) im = ax.imshow(rows, interpolation='nearest') f2.colorbar(im) ax.set_yticks(range(len(mut_spool))) ax.set_yticklabels(labels) f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
def site_energy_deltas(showtype = 'first_part_energies', muts_allowed = None, mkey = None, figtitle = 'sed', smoothing = None, sub_means = True, en_type = 'double', induction_type = 'ratio'): seqs, seqs_rndvals, keys = get_mutants() mut_inds = site_mut_inds() mean_induction = get_mean_induction() xs, ys, rs, cs , pas, mas, was, bads = [[] for i in range(8)] cons = get_cons() l = len(cons) energies = get_energies() enfun = lambda x : sum([energies[''.join(x[pos:pos+2])][2] for pos in range(len(x) -1)]) flip = 0 figtitle = figtitle + '_' + showtype if not mkey == None: muts_allowed = set(get_motif_dicts()[mkey]) figtitle = figtitle + '_{0}'.format(mkey) if not smoothing == None: figtitle = figtitle + '_sm={0}'.format(smoothing) if showtype == 'first_part_energies': if en_type == 'double': ens = get_energies() gibbs = dict([(k,v[2]) for k,v in ens.iteritems()]) else: gibbs = get_sing_energies() figtitle = figtitle + '_etype={0}'.format(en_type) enlen = len(gibbs.keys()[0]) doubles = gibbs.keys() keysrt = dict([(doubles[i], elt) for i,elt in enumerate( argsort(argsort(gibbs.values())))]) dub_muts = [[[] for j in range(len(keysrt)) ] for i in range(len(keysrt))] figtitle = figtitle + '_ind=' + induction_type for site, muts in enumerate(mut_inds): if muts_allowed != None: muts = list(muts_allowed.intersection(muts)) mut_avg = mean( seqs_rndvals[muts,0] / seqs_rndvals[muts,1]) trip_rng = position_triplet(site) wt_seq = array([cons[s] for s in trip_rng]) mut_seqs = seqs[muts][:,trip_rng] if induction_type == 'ratio': mut_inductions =(seqs_rndvals[muts,0] / seqs_rndvals[muts,1]) elif induction_type == 'on': mut_inductions = seqs_rndvals[muts,0] elif induction_type == 'off': mut_inductions = seqs_rndvals[muts,1] wt_e = enfun(wt_seq) mut_es = [enfun(seq) for seq in mut_seqs] if showtype == 'show_avgs': plus_avg = mean(mut_inductions[nonzero(greater(mut_es, wt_e))[0]]) minus_avg = mean(mut_inductions[nonzero(less_equal(mut_es, wt_e))[0]]) cs.extend(['red', 'blue']) xs.extend([site] * 2) ys.extend([plus_avg, minus_avg]) rs.extend([[30] * 2]) elif showtype == 'show_all': cs.extend(['red' if e > wt_e else 'blue' for e in mut_es]) xs.extend([site] * len(mut_es)) ys.extend(mut_inductions + np.random.rand(len(mut_es)) * .1) rs.extend([10] * len(mut_es)) elif showtype == 'smoothed_avg': plus_avg = mean(mut_inductions[nonzero(greater(mut_es, wt_e))[0]]) minus_avg = mean(mut_inductions[nonzero(less_equal(mut_es, wt_e))[0]]) whole_avg = mean(mut_inductions) if isnan(plus_avg): bads.append(site) plus_avg = 1 if isnan(minus_avg): bads.append(site) minus_avg = 1 if isnan(whole_avg): bads.append(site) whole_avg = 1 pas.append(plus_avg) mas.append(minus_avg) was.append(whole_avg) cs.extend(['red', 'blue']) xs.extend([site] * 2) ys.extend([plus_avg, minus_avg]) rs.extend([[30] * 2]) elif showtype == 'first_part_energies': for idx, trip in enumerate(mut_seqs): if len(trip) == 2 : continue if site < 1: continue if site > 28: continue for pos in range(len(trip) - enlen): midx = keysrt[''.join(trip[pos:pos+enlen])] cidx = keysrt[''.join(wt_seq[pos:pos+enlen])] dub_muts[cidx][midx].append( mut_inductions[idx] ) f = myplots.fignum(3,(8,8)) ax = f.add_subplot(111) if showtype in ['show_avgs', 'show_all']: ydat = np.log(ys) xdat = array(xs) cs = array(cs) rs = array(rs) ax.scatter(xdat, ydat, array(rs), c= cs, alpha = .2) xlim = [min(xs), max(xs)] ylim = [min(ydat), max(ydat)] ax.set_title('Average induction of site mutants (red mutants have stronger pairing)') ax.set_ylabel('fold induction') ax.set_xlabel('mutation site') elif showtype == 'smoothed_avg': pas =log( array(pas)) mas =log( array(mas)) was =log(array(was)) if sub_means: subvec = was else: subvec = zeros(len(was)) if smoothing == None: plus = pas - subvec minus =mas - subvec else: plus =sgs.smooth(pas -subvec, smoothing) minus =sgs.smooth(mas -subvec, smoothing) goods = ones(len(plus)) goods[[b for b in bads]] = 0 plus = plus * goods minus = minus * goods ydat = plus xdat = arange(len(plus)) ax.plot(plus , color = 'white', linewidth = 5) ax.plot(minus, color = 'white', linewidth = 5) ax.plot(plus, color = 'red', linewidth = 3) ax.plot(minus, color = 'blue', linewidth = 3) ax.fill_between(arange(l), plus, minus, where = greater_equal(minus, plus), color = 'blue', alpha = .5, interpolate = True) ax.fill_between(arange(l), minus, plus, where = less_equal(minus, plus), color = 'red', alpha = .5, interpolate = True) #ax.set_xlim([min(xdat),max(xdat)]); ax.set_ylim([min(ydat), max(ydat)]) ax.set_title('Average induction of site mutants (red mutants have stronger pairing)') ax.set_ylabel('fold induction') ax.set_xlabel('mutation site') elif showtype == 'first_part_energies': f = myplots.fignum(3,(8,4)) f.clear() ax = f.add_subplot(121) img =array( [[mean(log(inductions)) if sum(inductions) != 0 else nan for inductions in row] for row in dub_muts]) img[isinf(img)] = min(img[isfinite(img)]) ax.imshow(img, interpolation = 'nearest', cmap = plt.get_cmap('OrRd')) ax2 = f.add_subplot(122) ax2.imshow([[len(log(inductions)) for inductions in row] for row in dub_muts], interpolation = 'nearest', cmap = plt.get_cmap('OrRd')) for a in [ax,ax2]: a.set_xticks(keysrt.values()) a.set_xticklabels(keysrt.keys()) a.set_yticks(keysrt.values()) a.set_yticklabels(keysrt.keys()) ax.set_title('mean induction change') ax2.set_title('transition counts') ax.set_ylabel('wildtype base') ax.set_xlabel('mut base') else: raise Exception() if showtype in ['show_avgs', 'show_all', 'smoothed_avg']: l0 = [ax.get_ylim(), ax.get_xlim()] for rng in motif_rngs(): ax.plot(rng,[0]*2, linewidth = 6, color = 'black') ax.fill_betweenx(ax.get_ylim(), [rng[0]] * 2, [rng[1]] * 2, alpha = .2, color = 'black') for r in rng: ax.vlines(r,*ax.get_ylim(),alpha = .75) ax.set_ylim(l0[0]); ax.set_xlim(l0[1]) ax.annotate(figtitle, [0,0], xycoords ='figure fraction', va = 'bottom', ha = 'left') f.savefig(figtemplate.format(figtitle))
def run(): chains = parse_all() c0 = chains[0] ksrt =sorted(c0.keys()) charges = array([ sum([float(e['charge']) for e in c0[k]['pqr']]) for k in ksrt]) coords = array([ mean([e.get_coord() for e in c0[k]['pdb']], 0 ) for k in ksrt]) c1,c2 = chains[1:] strand_charges = [] strand_coords = [] for c in [c1, c2]: ksrt =sorted(c.keys()) strand_charges.append(array([ sum([float(e['charge']) for e in c[k]['pqr']]) for k in ksrt])) strand_coords.append(array([ mean([e.get_coord() for e in c[k]['pdb']], 0 ) for k in ksrt])) k1 = sorted([nt for nt in c1]) k2 = sorted([nt for nt in c2]) s1_atoms = list(it.chain(*[ [e.get_coord() for e in c1[k]['pdb'] ] for k in k1])) s2_atoms = list(it.chain(*[ [e.get_coord() for e in c2[k]['pdb'] ] for k in k2])) dna_atoms = [] dna_atoms.extend(s1_atoms) dna_atoms.extend(s2_atoms) dna_atoms = array(dna_atoms) #nearest neighbor params: kres =0 katoms_dna = 3 kres_atoms = 3 rvd_res = rvd_residues(c0,kres) xs = [] ys = [] cs = [] ss = [] ecs = [] rdists = [] rvd_groups = []; for i, rvd in enumerate(rvd_res): for r in rvd: atoms = array([e.get_coord() for e in r['pdb']]) dists = sum(square(atoms),1)[:,newaxis] + \ sum(square(dna_atoms),1)[newaxis,:] - \ 2 *sum(atoms[:,newaxis,:] * dna_atoms[newaxis,:,:],2) atom_srt_dists = np.sort(dists, 1) atom_knn_avgdist = np.mean(atom_srt_dists[:,:katoms_dna],1) res_srt_dists = np.sort(atom_knn_avgdist) res_k_avgdist = res_srt_dists[:kres_atoms] xs.append(mean(atoms[:,0])) ys.append(mean(atoms[:,1])) #colors = array([1,0,0]) * 1/atom_knn_avgdist[:,newaxis] cs.append(1/res_k_avgdist) rdists.append(res_k_avgdist) ss.append(50) ecs.append('none') rvd_groups.append(i) show_helix = False if show_helix: cs = array(cs) cs /= np.max(cs) f = mp.fignum(1, (12,12)) ax = f.add_subplot(111) ax.scatter(xs,ys,c = cs, s= ss, edgecolor = ecs) f.savefig(mp.figpath(figt.format('tal_rvd_neighborhoods'))) rvd_dists = [(k,[e[1] for e in list(g)]) for k,g in it.groupby(zip(rvd_groups,rdists), lambda x: x[0])] rs = rvds() tags = [ seq()[r:r+2] for r in rs ] nt = len(set(tags)) tag_idx_in_ct = dict([(e,i) for i,e in enumerate(set(tags))]) rvd_ct_map = dict([(i,tag_idx_in_ct[e]) for i,e in enumerate(tags)]) ct = mycolors.getct(nt) f = mp.fignum(3, (12,12)) ax= f.add_subplot(111) ax.set_xlabel('linear distance') ax.set_ylabel('nearest neighbor distance to DNA') labels_set = set([]) for k, g in rvd_dists: if tags[k] in labels_set: ax.plot(g, color = ct[rvd_ct_map[k]]) else: labels_set.add(tags[k]) print 'labelling' ax.plot(g, color = ct[rvd_ct_map[k]],label = tags[k]) ax.legend() f.savefig(mp.figpath(figt.format('tal_rvd_distances'))) #plot_charges(coords, charges, strand_coords) return