예제 #1
0
파일: plots.py 프로젝트: bh0085/projects
def align_len_histogram(parsed):

    p0 = parsed.values()[0]
    bitlens = array( [sorted([e['bits'] for e in val.values()])[:-1] 
                      for val in p0.values()]).flatten()
    bitlens = array(list(it.chain(*bitlens)))
    bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8
    
    mind=  8 #min(deg_c.values())+.00001
    maxd = max(bitlens) #max(deg_c.values())/3
    bins = linspace(mind,maxd,8)
    
    h_paths,bin_edges = histogram(bitlens,bins)

    h_paths = array(h_paths,float)
    h_paths/= sum(h_paths)

    f = myplots.fignum(3, (8,8))

    ax = f.add_subplot(111)

    ax.plot(bins[:-1], h_paths, color = 'red')
    ax.set_xlabel('alignment hit length')
    ax.set_ylabel('frequency')
    ax.set_title('best matched substring lengths')

    raise Exception()

    f.savefig(myplots.figpath('walk_centrality'))
     

    paths_cat = paths.flat
    n = len(paths_cat)
    
    degs = [deg_c[p]for p in paths_cat[::10]]
    
    mind=  min(deg_c.values())+.00001
    maxd = max(deg_c.values())/3
    bins = linspace(mind,maxd,8)
    
    h_paths,bin_edges = histogram(degs,bins)
    h_rand,bin_edges  = histogram(deg_c.values(), bins)

    h_paths = array(h_paths,float)
    h_rand = array(h_rand,float)
    h_paths/= sum(h_paths)
    h_rand/= sum(h_rand)

    f = myplots.fignum(3, (8,8))

    ax = f.add_subplot(111)

    ax.plot(bins[:-1], h_paths, color = 'red')
    ax.plot(bins[:-1], h_rand, color = 'black')
    ax.set_xlabel('node centrality')
    ax.set_ylabel('frequency')
    ax.set_title('distribution of centrality in walks vs. random')
예제 #2
0
def align_len_histogram(parsed):

    p0 = parsed.values()[0]
    bitlens = array([
        sorted([e['bits'] for e in val.values()])[:-1] for val in p0.values()
    ]).flatten()
    bitlens = array(list(it.chain(*bitlens)))
    bitlens = 2 * (bitlens - np.min(bitlens.flatten())) + 8

    mind = 8  #min(deg_c.values())+.00001
    maxd = max(bitlens)  #max(deg_c.values())/3
    bins = linspace(mind, maxd, 8)

    h_paths, bin_edges = histogram(bitlens, bins)

    h_paths = array(h_paths, float)
    h_paths /= sum(h_paths)

    f = myplots.fignum(3, (8, 8))

    ax = f.add_subplot(111)

    ax.plot(bins[:-1], h_paths, color='red')
    ax.set_xlabel('alignment hit length')
    ax.set_ylabel('frequency')
    ax.set_title('best matched substring lengths')

    raise Exception()

    f.savefig(myplots.figpath('walk_centrality'))

    paths_cat = paths.flat
    n = len(paths_cat)

    degs = [deg_c[p] for p in paths_cat[::10]]

    mind = min(deg_c.values()) + .00001
    maxd = max(deg_c.values()) / 3
    bins = linspace(mind, maxd, 8)

    h_paths, bin_edges = histogram(degs, bins)
    h_rand, bin_edges = histogram(deg_c.values(), bins)

    h_paths = array(h_paths, float)
    h_rand = array(h_rand, float)
    h_paths /= sum(h_paths)
    h_rand /= sum(h_rand)

    f = myplots.fignum(3, (8, 8))

    ax = f.add_subplot(111)

    ax.plot(bins[:-1], h_paths, color='red')
    ax.plot(bins[:-1], h_rand, color='black')
    ax.set_xlabel('node centrality')
    ax.set_ylabel('frequency')
    ax.set_title('distribution of centrality in walks vs. random')
예제 #3
0
파일: hapmap.py 프로젝트: bh0085/compbio
def snp_counts(indy_arr, indy_info):
    
    regions = indy_regions(indy_arr, indy_info)
    
    ct = mycolors.getct(len(regions))
    skip = 1
    ofs = 4
    
    f = myplots.fignum(4, (8,8))
    ax = f.add_subplot(111)
    
    n_snps = 20
    rset = set(regions)
    rcounts = zeros((len(rset), n_snps))

    xs = []
    ys = []
    cs = []
    rs = []
    
    for i, snp in enumerate(indy_arr.T[4::5][:50]):
        rsub = array(regions[::100],float) / max(regions[:20])
        inds = argsort(rsub)
        ys.extend([i] *len(rsub))
        rs.extend([10 + 30 * (1 - snp[:2:100])])
        xs.extend(rsub)
        
        print i
        
    ax.scatter(xs,ys,rs)


    f.savefig(myplots.figpath('regional_snp_counts_first.pdf'))
    
    return
예제 #4
0
파일: genes.py 프로젝트: bh0085/compbio
def plotPeaks(num = 1):
    import cb.utils.plots as myplots

    def setHist(**kwargs):
     peaks = getPeaks()['chr{0}'.format(num)]
     proms = getTrackChrPromoters(num = num)
     
     all_hits = zeros(20)
     for k,v in proms.iteritems():
         mid =(v[0] + v[1]) / 2
         deltas = []
         for p in peaks:
             pmid = (p['start'] + p['end'])/2
             if abs(pmid - mid) < 5000:
                 deltas.append(pmid - mid)
         hits, bin_offsets = histogram(deltas, 20, [-5000,5000])
         all_hits += hits;
     return bin_offsets, all_hits;
    bin_offsets, hits = mem.getOrSet(setHist, 
                                     num = num)
    f = myplots.fignum(1)
    ax = f.add_subplot(111)
    ax.set_xlabel('distance from promoter')
    #ax.set_xticks(bin_offsets)
    #ax.set_xticklabels(['{0}'.format(e) for e in bin_offsets])
    ax.set_ylabel('counts')
    ax.plot(bin_offsets[:-1],hits)
예제 #5
0
def analyze():
    f = myplots.fignum(1)

    gl = get_data()['gluc']
    cl = get_data()['cluc']

    ax = f.add_subplot(111)
    ax.imshow(cl / gl, aspect='auto', interpolation='nearest')
    ax.set_title('Enrichment of Cluc over control Gluc')
    path = myplots.figpath('corr_matrix.pdf')
    f.savefig(path)

    f.clear()
    ax = f.add_subplot(121)
    glf = gl.flatten()[:-6]
    clf = cl.flatten()[:-6]

    n_mm = array([[e, e, e]
                  for e in [0, 2, 2, 2, 3, 3, 3, 0, 2, 2, 2, 3, 3, 3]],
                 float).flatten()

    ax.set_title('cluc enrichment vs mm count')
    ax.set_xlabel('mismatch count')
    ax.set_ylabel('fold enrichment ocluc')
    ax.scatter(n_mm, clf / glf)
    pf1 = polyfit(n_mm, clf / glf, 1)
    pf2 = polyfit(n_mm, clf / glf, 2)

    ax.plot(polyval(pf1, [0, 2, 3]))

    path = myplots.figpath('enrichment_vs_mm.pdf')
    f.savefig(path)

    ax2 = f.add_subplot(121)
예제 #6
0
파일: results.py 프로젝트: bh0085/zhang
def analyze():
    f = myplots.fignum(1)
    
    gl = get_data()['gluc']
    cl = get_data()['cluc']

    ax = f.add_subplot(111)
    ax.imshow(cl / gl, aspect = 'auto', interpolation = 'nearest')
    ax.set_title('Enrichment of Cluc over control Gluc')
    path = myplots.figpath('corr_matrix.pdf')
    f.savefig(path)
    
    f.clear()
    ax = f.add_subplot(121)
    glf = gl.flatten()[:-6]
    clf = cl.flatten()[:-6]
    
    n_mm = array([ [e,e,e] for e in [0,2,2,2,3,3,3,0,2,2,2,3,3,3]], float).flatten()
    
    
    ax.set_title('cluc enrichment vs mm count')
    ax.set_xlabel('mismatch count')
    ax.set_ylabel('fold enrichment ocluc')
    ax.scatter(n_mm,clf/glf)
    pf1 = polyfit(n_mm, clf/glf, 1)
    pf2 = polyfit(n_mm, clf/glf, 2)
    
    ax.plot(polyval(pf1,[0,2,3]))
   
    path = myplots.figpath('enrichment_vs_mm.pdf')
    f.savefig(path)

    ax2 = f.add_subplot(121)
예제 #7
0
def run(meth = 'moment'):
    out,srts = bs.run0(arr = arr, itr = 2, meth = meth)
    f = myplots.fignum(3,(12,6))
    ax = f.add_subplot(111)

    csrts = [s for s in srts if len(s) == len(cols)][0]
    rsrts = [s for s in srts if len(s) == len(rows)][0]
    cprint = [rows[rs] for rs in rsrts]
    rprint = [cols[cs] for cs in csrts]


    im = ax.imshow(out,
              interpolation= 'nearest',
              cmap = plt.get_cmap('OrRd'),
              )

        #flip the rows and columns... looks better.   
    ax.set_xticks(arange(len(cols))+.25)
    ax.set_yticks(arange(len(rows))+.25)

    ax.set_yticklabels([e for  e in cprint])
    ax.set_xticklabels(rprint)

    print 'rows: \n{0}'.format(', '.join([e.strip() for e in rprint]))
    print
    print 'cols: \n{0}'.format(', '.join([e.strip() for e in cprint]))

    plt.colorbar(im)
    
    f.savefig(myplots.figpath('correlation_plot_2_4_{0}.pdf')
              .format(meth))
    return
예제 #8
0
파일: run.py 프로젝트: bh0085/compbio
def gdraw0(graphs, plotname = 'default_name', measure = 'cosine'):
    pos = nx.graphviz_layout(graphs['kg'])


    adjs = [ array(nx.adj_matrix(g)) for g in graphs.values() ]
    nrms = []
    for a in adjs:
            n = sqrt(sum(a**2))
            nrms.append(a / n)
    
    kgelt = graphs.keys().index('kg')
    if measure == 'cosine':
        sims = array([round(nfu.cosine_adj(a1,nrms[kgelt]),8) for a1 in nrms])
    else:
        raise Exception()

    kg = graphs['kg']
    srto = argsort(graphs.keys()) 
    #XVALs give ranks of each key index.
    xvals = argsort(srto)


    cols = map(lambda x: 
               ('flt' in x and x.count('thr') > 1) and 'orange' or
               ('flt' in x) and 'red' or
               ('thr' in x) and 'yellow' or
               ('fg' in x) and 'green' or 
               ('su' in x) and 'blue' or 
               'black', graphs.keys())

    yvals = sims

    f = plt.gcf()
    f = myplots.fignum(3, (.25 * len(sims),10))
    f.clear()
    ax = f.add_subplot(111)
    myplots.padded_limits(ax,xvals,yvals + [0.], margin = [.02,.02])
    ax.scatter(xvals,yvals,100, color = cols)
    ax.set_ylabel('red fly similarity ({0})'.format(measure))
    ax.set_xlabel('networks')
    ax.set_xticklabels([])
    ax.set_xticks([])
    mark_ys = [0, median(sims), mean(sims), sort(sims)[::-1][1],1]
    ax.hlines(mark_ys, *ax.get_xlim(), linestyle = ':',alpha = .2)
    

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_nolabels.pdf'.\
                               format(plotname,measure)))


    ax.set_xticks(range(len(srto)))
    ax.annotate('\n'.join(' '.join(z) for z in zip(graphs.keys(),cols)),
                [0,1],xycoords = 'axes fraction', va = 'top')
    
    ax.set_xticklabels([graphs.keys()[i] for i in srto], 
                       rotation = 45, size = 'xx-small',ha = 'right')

    f.savefig(cfg.dataPath('figs/meAWG/filter_{0}_meth_{1}_labels.pdf'.\
                               format(plotname,measure)))
예제 #9
0
def make_edge_comparisons(cgraphs, bgraphs):
    cgsets = dict([(k, set(v.edges())) for k, v in cgraphs.iteritems()])

    for bname, bg in bgraphs.iteritems():
        #if bname != 'kn': continue
        f = myplots.fignum(3,(8,8))
        f.clear()
        axes = [f.add_subplot(311),
                f.add_subplot(312),
                f.add_subplot(313)]
        ccolors = dict(zip(cgraphs.keys(), mycolors.getct(len(cgraphs))))

        bgset = set(bg.edges())

        yvals = {'jaccard':[], 'spec':[], 'sens':[]}
        xofs = 0
        heights, xvals ,colors ,names= [], [], [], []
        for cname, cg in sorted(cgraphs.iteritems(),
                                key = lambda x: x[0]):
            cgset = set(cg)
            #SIMILARITIES MATCHING THE ORDER OF SIMNAMES
            yvals['jaccard'].append(float(len(bgset.intersection(cgsets[cname])))/\
                        len(bgset.union(cgsets[cname])))
            yvals['spec'].append(
                    float(len(bgset.intersection(cgsets[cname])))/\
                        len(cgsets[cname]))
            yvals['sens'].append(
                    float(len(bgset.intersection(cgsets[cname])))/\
                        len(bgset))

            #colors.extend([ccolors[cname]] * len(sims))
            #heights.extend(sims)
            names.append(cname )
            #xvals.extend(xofs +arange(len(sims)))
            #xofs = max(xvals) + 2
            #if cname == 'unsup': raise Exception()

        for j, elt in enumerate(yvals.iteritems()):
            metric_name = elt[0]
            heights = elt[1]
            print heights
            ax = axes[j]
            xvals = argsort(argsort(heights))
            ax.bar(xvals, heights, color = [ccolors[n] for n in names])
            ax.set_title('edge similarity vs {0}, metric: {1}'.\
                             format(bname, metric_name))

            myplots.color_legend(f, ccolors.values(), ccolors.keys())
            #for i , n in enumerate(names):
            #    ax.annotate(n, [xvals[i], .001],
            #            xycoords = 'data', 
            #            xytext = [2,0],
            #            textcoords = 'offset points',
            #            rotation = 90, va = 'bottom', ha = 'left')

 
        f.savefig(figtemplate.format('edges_vs_{0}'.format(bname)))
예제 #10
0
파일: ows_tools.py 프로젝트: bh0085/synql
def plot_city_posts(**kwargs):
    cp = city_posts(**mem.sr(kwargs))
    xs = cp["lons"]
    ys = cp["lats"]
    rs = [len(x) for x in cp["posts"]]

    f = myplots.fignum(3, (4, 4))
    ax = f.add_subplot(111)
    ax.scatter(xs, ys, s=rs)
예제 #11
0
파일: worm.py 프로젝트: bh0085/compbio
def make_degree_plots_0():
        cxns = get_synapse_array()
        rows = get_rows()

        imaps = get_array_imaps()
        ctypes =imaps['ctypes']
        ctypes_imap = imaps['ctypes_imap']
        nnames = imaps['nnames']
        nnames_imap = imaps['nnames_imap']
            
        f2 = myplots.fignum(2, (12,6))
        ax1 = f2.add_subplot(121)
        ax2 = f2.add_subplot(122)
        
        var_degs = np.sum(cxns,1)
        maxval = log10(np.max(var_degs) + 1)
        ct = mycolors.getct(len(ctypes))
        for z in range(len(ctypes)):
            vals = var_degs[:,z]
            vals = log10(1 + vals)
            count,kde = make_kde(vals)
            xax = linspace(0,maxval,10)
            h = histogram(vals, xax)
            ax1.hist(vals,xax, 
                     color = ct[z],
                     zorder = 10,
                     alpha = .25)
            ax1.plot(xax,kde(xax)*sum(h[0]),
                             label = ctypes[z],
                             color = ct[z],
                             zorder = 5)
            ax1.set_xlabel('$log_10$ of edge degrees of various types')
        ax1.legend()
        
        logxy = [ log10(1 +var_degs[:,ctypes_imap['S']]),
                  log10(1 +var_degs[:,ctypes_imap['R']])]
        max_inode =np.argmax(logxy[0] + logxy[1])
        max_nodename = [k 
                        for k,v in nnames_imap.iteritems() 
                        if v == max_inode][0]
        

        ax2.scatter(logxy[0]+.15*random.rand(len(nnames))
                    ,logxy[1] + .15*random.rand(len(nnames)),
                    color = 'red',
                    alpha = .3)
        ax2.set_xlabel('Sending Degree')
        ax2.set_ylabel('Receiving Degree')
        r2 = corrcoef(logxy[0],logxy[1])[1,0]

        myplots.maketitle(ax2, ('correlation coeff: {0:2.2},\n'+\
                              'max {1} has {2} $e_{{out}}$, {3} $e_{{in}}$')\
                              .format(r2, max_nodename, 
                                      var_degs[max_inode, ctypes_imap['S']],
                                      var_degs[max_inode, ctypes_imap['R']]))
        myplots.maketitle(ax1, 'histogram and KDE of\nvarious edge degrees')        
        f2.savefig(myplots.figpath('degree_histograms_{0}'.format(edge_set)))
예제 #12
0
파일: plot0.py 프로젝트: bh0085/compbio
def plot_easy_inference():
    dg = io.getGraph()
    pos = gd.getpos(dg)
    
    f = myplots.fignum(4, (8,8))
    ax = f.add_subplot(111)
    ax.set_title('putative worm chip network')
    gd.easy_draw(dg, pos)

    f.savefig(myplots.figpath('worm_chip_graph.pdf'))
예제 #13
0
파일: hapmap.py 프로젝트: bh0085/compbio
def snp_count_plot(indy_arr, indy_info):
    regions = indy_regions(indy_arr, indy_info)
    f = myplots.fignum(3, (8,8))
    ax = f.add_subplot(111)
    for row in indy_arr.T[4::5][:50]:
        rsub = array(regions[:500],float) / max(regions[:500])
        inds = argsort(rsub)
        ax.plot(row[:500][inds] + random.rand(500) * .1)
        ax.plot(rsub[inds], linewidth = 5)
    return
예제 #14
0
파일: plot0.py 프로젝트: bh0085/compbio
def peak_thr_histograms(**kwargs):
    '''histograms of score and distance'''
    dthr = 1500
    sthr = 1e-2
    dsign = -1
    simple = wp.get_simple_thr(**mem.rc(kwargs,
                                        dthr = dthr,
                                        sthr = sthr,
                                        dsign = dsign
                                        )
                               )

                                 
                                 
    min_score = -1
    max_score = -1
    for k,v in simple.iteritems():
        smax = np.max(v['scores'])
        if max_score == -1 or smax > max_score:
            max_score = smax
        smin = np.min(v['scores'])
        if min_score == -1 or smin < min_score:
            min_score = smin

    lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))]
    sbin_mids = range(lrange[0],lrange[1]+1)
    nsb = len(sbin_mids)
    sbins = zeros((nsb))

    dbin_size = 50
    dbin_mids = range(-dthr, dthr, dbin_size)
    ndb = len(dbin_mids)
    dbins = zeros(( ndb))

    for k,v in simple.iteritems():
        for d in v['dists']:
            dbins[int(d + dthr)/dbin_size] += 1
        for s in v['scores']:
            sbins[int(log10(s) - lrange[0])] += 1
    
    f= myplots.fignum(1,(10,6))
    ax = f.add_subplot(121)
    ax.set_ylabel('log 10 counts')
    ax.set_xlabel('distance')
    ax.set_title('simplified tss distances (d<{0})'.format(dthr))
    ax.plot(dbin_mids,log10(dbins), color = 'black')
    f.savefig(myplots.figpath('chip_simple_distance.pdf'))

    ax = f.add_subplot(122)
    ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr))
    ax.set_ylabel('log10 counts')
    ax.set_xlabel('log10 peak score')
    ax.plot(sbin_mids,log10(sbins), color = 'black')
    f.savefig(myplots.figpath('chip_simple_scores.pdf'))
예제 #15
0
def plot_mers(mer_cts):
    f = myplots.fignum(3, (8, 8))

    ax = f.add_subplot(111)
    hist, bin_edges = histogram(mer_cts.values(), 20)
    ax.fill_between(bin_edges[:-1], log(hist), edgecolor='black', linewidth=5)
    ax.set_xlabel('mer rediscovery rate')
    ax.set_ylabel('$log(n)$')
    ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.')

    f.savefig(myplots.figpath('mer_ct_hist'))

    return
예제 #16
0
파일: analyze.py 프로젝트: bh0085/compbio
def position_activities(cons, seqs, activities,
                        show_wt = False):

    
    induction = array([a[0] / a[1] for a in activities])
    l = len(cons)
    
    #wt = [ mean([
    #            val for val in induction[:,i]] if )[nonzero[ for i in range(l)]
    #[for i, let in enumerate(seq)] for seq in seqs.T

    #wt = [[induction[j] 
    #            for j, let in enumerate(seq) if let == cons[i] ] 
    #      for i, seq in enumerate(seqs.T)]
    mut = [[induction[j] 
                 for j, let in enumerate(seq) if let != cons[i] ]
           for i, seq in enumerate(seqs.T)]
    
    percentiles = [10**x for x in range(-5,2) ]+ [40] 
    percentiles = percentiles + [100 -p for p in percentiles]

    f = myplots.fignum(2, (8,8))
    f.clear()
    ax = f.add_subplot(111)
    

    mtiles = [] 
    for i,mt in enumerate( mut):
        if not mt:
            mtiles.append([nan] * len(percentiles))
        else:
            ax.scatter( zeros(len(mt)) + i, \
                            log(mt) + random.random(len(mt))*.1,\
                        2 , color = 'black',alpha = .05)
            mtiles.append([percentile(mt,p) for p in percentiles])

    #ax.plot(arange(len(wmeans)), wmeans, color = 'blue', linewidth = 6,alpha = .2)
    
    mtiles = array(mtiles)
    for mmeans in mtiles.T:
        ax.plot(arange(len(mmeans)), log(mmeans), color = 'red', linewidth = 3, alpha = .6)
    #ax.annotate('$R^2 = {0}$'.format(rsquared),
    #            [1,1], xycoords = 'axes fraction', 
    #            ha = 'right', va = 'top')
    ax.set_xlabel('position mutated (~10 per sequence)')
    ax.set_ylabel('log induction (induced expr/uninduced)')
    ax.set_title('Induction ratios for sequences mutated at points')
    
    figtitle = 'single_position_percentiles'
    f.savefig(figtemplate.format(figtitle))
예제 #17
0
파일: analyze.py 프로젝트: bh0085/compbio
def mut_counts(cons, seqs, name = promoter_type):
    
    l = len(cons)
    figtitle = '{0}_mut_counts'.format(name)
    f = myplots.fignum(1, (8,8))
    
    ax = f.add_subplot(111)
    counts = zeros(( 4, l))
    lets = ['A','T','G', 'C']
    for pos in range(l):
        counts[:,pos] = [0 if let == cons[pos] else list(seqs[:,pos]).count(let) 
                         for let in lets ]
    seismic.seismic(counts, ax = ax)
    
    f.savefig(figtemplate.format(figtitle))
예제 #18
0
파일: plot0.py 프로젝트: bh0085/compbio
def peak_distance_histogram(**kwargs):    

    atype = kwargs.get('atype', wp. default_atype)
    chips = wp.get_assay_gprops(**mem.rc(kwargs))
    chiplist = chips.values()
    chipkeys = chips.keys()
    xs = []
    ys = []
    

    sec_spread = np.max([
            np.max([ 
                    np.max(np.abs([e['dist'] for e in v2['secondaries']]))
                    for v2 in v.values()])
            for v in chips.values()
            ])

    hist_spread = 10000
    bin_wid = 200
    bin_mids = arange(-1* hist_spread, 1*hist_spread,bin_wid)
    bin_starts = bin_mids - bin_wid/2
    nb = len(bin_starts)

    prim_hists = zeros((len(chips), len(bin_starts)))
    sec_hists = zeros((len(chips), len(bin_starts)))
    for i,e in enumerate(chiplist):
        for k,v in e.iteritems():
            pbins = array([e2['dist']/bin_wid for e2 in v['primaries']],int)
            sbins = array([e2['dist']/bin_wid for e2 in v['secondaries']],int)
            pbins += nb /2
            sbins += nb /2

            sbins[less(sbins,0)] = 0
            sbins[greater(sbins,nb-1)] = nb-1
            
            pbins[less(pbins,0)] = 0
            pbins[greater(pbins,nb-1)] = nb-1
            for b in pbins: prim_hists[i][b]+=1
            for b in sbins: sec_hists[i][b]+=1
            
    f= myplots.fignum(1,(8,6))
    ax = f.add_subplot(111)
    ax.set_title('chip peak distances to primary/sec tss for {0}'.format(atype))
    for p in prim_hists:
        ax.plot(bin_mids,p, color = 'green')
    for s in sec_hists:
        ax.plot(bin_mids,s, color = 'red')
    f.savefig(myplots.figpath('chip_distance_hists_for{0}.pdf'.format(atype)))
예제 #19
0
파일: plots.py 프로젝트: bh0085/projects
def plot_mers(mer_cts):
    f = myplots.fignum(3, (8,8))
    
    
    ax = f.add_subplot(111)
    hist,bin_edges = histogram(mer_cts.values(), 20)
    ax.fill_between(bin_edges[:-1], log(hist),
                    edgecolor = 'black',
                    linewidth = 5)
    ax.set_xlabel('mer rediscovery rate')
    ax.set_ylabel('$log(n)$')
    ax.set_title('Frequencies of 5-mer reoccurence across 10,000 walks.')

    f.savefig(myplots.figpath('mer_ct_hist'))
    
    return 
예제 #20
0
파일: parse.py 프로젝트: bh0085/compbio
def load(res = 25):
    if res == 25: fpath = '/data/brain_atlas/AtlasAnnotation25.sva'
    else: raise Exception()

    print 'path: ', fpath
    size =  os.path.getsize(fpath)
    n = 10000
    skip = size / n 
    
    f = open(fpath)
    f.readline()
    f.readline()
    coords = []
    evals = []
    while( len(coords) < n):
        
        f.seek(skip, 1)
        l0 = f.readline()
        l = f.readline()
        if( l == ''): break;
        lvals =[float(v) for v in l.split(',')]
        coords.append(tuple(lvals[0:3]))
        evals.append(lvals[3])
        

    print 'len: ', (len(coords))
    fig = myplots.fignum(1,(8,6))
    
    ax = fig.add_subplot(111, projection='3d')

    xyvals = array([[x[0],x[1], x[2]] for x in coords])
    evals = array(evals)
    evals = (evals / np.max(evals) )[:,newaxis] * array([1.,0,0])
    print shape(xyvals)
    ax.scatter(xyvals[:,0], xyvals[:,1], xyvals[:,2], 
               s = 5,
               edgecolor = 'none',
               facecolor = evals)

    #raise Exception()

    path = myplots.figpath('brainmap_spatial_coords_{0}'.format(res))
    fig.savefig(path)
        


    return coords
예제 #21
0
파일: analyze.py 프로젝트: bh0085/compbio
def motif_num_occurrence_vs_induction(mgroup = None,
                                      mtuple = None,
                                      hit = True, 
                                      induction_type = 'ratio'):
    if mgroup != None:
        mtuples = motif_grps(mgroup, hit = hit)
        mdict = get_motif_dicts()
        muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples])))
    else:
        muts_allowed = set(get_motif_dicts()[mtuple])

    inductions = get_mean_induction()
    motifs = get_motifs()
    seqs, rndvals, keys = get_mutants()
    
    if induction_type == 'ratio':  mut_inductions =(rndvals[:,0] / rndvals[:,1])
    elif induction_type == 'on': mut_inductions =  rndvals[:,0]
    elif induction_type == 'off': mut_inductions = rndvals[:,1]
    
    inductions = dict([(keys[i], mut_inductions[i] )
                       for i in range(len(rndvals)) ] )

    if mgroup == None:
        figtitle = 'motifs/ind_type={1}/occurence_v_induction_tuple={0}'.\
            format(mtuple, induction_type)
    else:
        figtitle = 'motifs/ind_type={1}/occurence_v_induction_group={0}'.\
            format(mgroup, induction_type)
        
    

    f = myplots.fignum(3, (8,8))
    ax = f.add_subplot(111)
    ax.scatter(*zip(*[(log(inductions[keys[i]]), len(motifs[keys[i]])) 
                     for i in muts_allowed]))

    ax.set_ylabel('number of motifs found')
    ax.set_xlabel('log induction')

    ax.annotate(figtitle, [1,1], 
                va = 'bottom', ha = 'right',
                xycoords = 'figure fraction')

    fpath = figtemplate.format(figtitle)
    if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath))
    f.savefig(figtemplate.format(figtitle))
예제 #22
0
파일: inference.py 프로젝트: bh0085/compbio
def process_rc(cc, rows, cols, meth="binary"):
    rmembers = zeros(len(rows)) + 4
    cmembers = zeros(len(rows)) + 4
    for i, r in enumerate(rows):
        rmembers[i] = argmax(r) if np.max(r) > 0 else len(r)
    for i, c in enumerate(cols):
        cmembers[i] = argmax(c) if np.max(c) > 0 else len(c)

    rorder = argsort(rmembers)
    corder = argsort(cmembers)

    f = myplots.fignum(3, (8, 8))
    ax = f.add_subplot(111)
    ax.imshow(cc[rorder][:, corder], aspect="auto", interpolation="nearest")

    f.savefig(myplots.figpath("biclustered_expr_{0}.pdf".format(meth)))
    raise Exception()
예제 #23
0
파일: tal_struct.py 프로젝트: bh0085/zhang
def plot_charges(coords, charges, strand_coords):

    f0 = mp.fignum(1, (6,6))
    ax = f0.add_subplot(111)
    
    colors = [ 'red' if q > 0 else 'blue' for q in charges]
    ax.scatter(*coords[:,:2].T, c = colors, s = 50, zorder = 5)
    ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = 1, zorder = -1)
    ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = 1, zorder = -1)

    ax.scatter(*strand_coords[0][:,:2].T, c = 'black', alpha = .5, zorder = 6)
    ax.scatter(*strand_coords[1][:,:2].T, c = 'black', alpha = .5, zorder = 6)
    #ax.scatter(*txy.T, c = 'red')
    
    fp = mp.figpath(figt.format('tal_xy_charge_scatter'))
    f0.savefig(fp)
    
    return
예제 #24
0
def make_transitivity(graphs):
    f = myplots.fignum(3,(8,8))
    udgraphs = dict([(k, nx.Graph(v))
                     for k, v in graphs.iteritems()])
    clusters = dict([(k, nx.algorithms.transitivity(v))
                     for k, v in udgraphs.iteritems()])

    ax = f.add_subplot(111)
    xax = range(len(clusters.keys()))
    ax.plot(xax, clusters.values())
    ax.set_title('transitivity for network graphs')
    ax.set_xticks(xax)
    ax.set_xticklabels(clusters.keys())


    figtitle = 'transitivity'
    fpath = figtemplate.format(figtitle)
    f.savefig(fpath)
예제 #25
0
파일: ows_tools.py 프로젝트: bh0085/synql
def plot_times(**kwargs):
    vals = last_5(**mem.rc(kwargs))

    xs = []
    ys = []
    for v in vals:
        for r in v:

            raise Exception()
            ca = r.created_at
            catime = (((((ca.month * 30) + ca.day) * 24 + ca.hour) * 60 + ca.minute) * 60) + ca.second
            ys.append(catime)

    xs = range(len(ys))

    f = myplots.fignum(3, (4, 4))
    ax = f.add_subplot(111)
    ax.scatter(xs, ys)

    return ys
예제 #26
0
파일: stats.py 프로젝트: bh0085/projects
def show_binned_data_1d(descriptors, datum, cmaps):
    '''
Grab metadata and parsed grids from binned_data_1d and
plot them.
'''
    f = myplots.fignum(1, (6, 8))
    gl = len(descriptors)
    dshapes = [shape(d) for d in datum]
    ntasks = dshapes[0][-1]
    task_colors = mycolors.getct(ntasks)

    for i, e in enumerate(datum):
        ax = f.add_subplot('{0}1{1}'\
                               .format(gl,i + 1))
        ax.set_title(descriptors[i][0])

        sums = np.sum(e, 0)
        for i, task in enumerate(sums.T):
            p = ax.plot(log(2 + task[::20]),
                        color=task_colors[i],
                        label=cmaps[i])
예제 #27
0
파일: day0.py 프로젝트: bh0085/compbio
def check_results(locii, results, n_runs = 400):
    a0 = fetch_num_ali()
    names = fetch_alinames()
            
    f = myplots.fignum(3,(8,8))
    ax = f.add_subplot(211)
    vec = zeros(len(a0[0]))

    xys = {}
    for k in results.keys():
        xys[k] =  array([[l,v['Mean z-score']] for v,l in zip(results[k],locii[k]) if len(v) >= 19],float).T

    raise Exception()
    ax2 = f.add_subplot(111)
    
    ax2.scatter(xys[3][0],xys[3][1])
    #ax2.scatter(xys[8][0],xys[8][1], color = 'red')


    
    f.savefig(myplots.figpath('run0_zscores_{0}runs'.format(n_runs)))
예제 #28
0
파일: plots.py 프로젝트: bh0085/projects
def align_heatmap(parsed):
    p0 = parsed.values()[0]
    bitlens = array( [sorted([e['expect'] for e in val.values()])[:-1] 
                      for val in p0.values()])
    f = myplots.fignum(3, (8,8))

    ax = f.add_subplot(111)
    nodes = set(p0.keys())
    for v in p0.values():
        nodes = nodes.union(set(v.keys()))
        
    nmap = dict([(i, k) for i,k in enumerate(nodes)])
    r_nmap = dict([(k,i) for i,k in nmap.iteritems()])

    z = zeros((len(nodes),len(nodes)))
    for k,v in p0.iteritems():
        i = r_nmap[k]
        for k2,v2 in v.iteritems():
            j = r_nmap[k2]
            z[i,j] = 1 / (.0001 + v2['expect'])
    ax.imshow(z[argsort(sum(z,1)),:][::-1,::-1][:100,:100])
예제 #29
0
def align_heatmap(parsed):
    p0 = parsed.values()[0]
    bitlens = array([
        sorted([e['expect'] for e in val.values()])[:-1]
        for val in p0.values()
    ])
    f = myplots.fignum(3, (8, 8))

    ax = f.add_subplot(111)
    nodes = set(p0.keys())
    for v in p0.values():
        nodes = nodes.union(set(v.keys()))

    nmap = dict([(i, k) for i, k in enumerate(nodes)])
    r_nmap = dict([(k, i) for i, k in nmap.iteritems()])

    z = zeros((len(nodes), len(nodes)))
    for k, v in p0.iteritems():
        i = r_nmap[k]
        for k2, v2 in v.iteritems():
            j = r_nmap[k2]
            z[i, j] = 1 / (.0001 + v2['expect'])
    ax.imshow(z[argsort(sum(z, 1)), :][::-1, ::-1][:100, :100])
예제 #30
0
파일: inference.py 프로젝트: bh0085/compbio
def get_coexpression(gc, **kwargs):
    gc = gc / sum(gc, 0)
    f = myplots.fignum(3, (8, 8))
    ax = f.add_subplot(111)

    for c in gc[:1]:
        # cplot = nonzero(greater(c,-1))[0][::10]
        # xs = array([genome_coords[k[0]]
        #            for k in sorted(gene_srtidxs.iteritems(),
        #                            key = lambda x: x[1])])
        ys = c

        # ax.scatter(*array([[gene_srtidxs[k],genome_coords[k]] for k in gene_union]).T)
        # ax.plot(xs, ys + random.rand(len(ys))*.1, color = random.rand(3),
        #        alpha = .25)

    cc = corrcoef(gc.T)
    ax.imshow(cc[:1000:1, :1000:1], aspect="auto")

    f.savefig(myplots.figpath("coex_counts_per_tissue.pdf"))

    return cc
    return genes, gene_counts, gene_info
예제 #31
0
파일: stats.py 프로젝트: bh0085/projects
def show_binned_data_1d(descriptors, datum, cmaps):
    '''
Grab metadata and parsed grids from binned_data_1d and
plot them.
'''
    f = myplots.fignum(1, (6,8))
    gl = len(descriptors)
    dshapes = [shape(d) for d in datum]
    ntasks = dshapes[0][-1]
    task_colors = mycolors.getct(ntasks)
    
    
    for i,e in enumerate(datum):
        ax = f.add_subplot('{0}1{1}'\
                               .format(gl,i + 1))
        ax.set_title(descriptors[i][0])
        
        
        
        sums =np.sum(e, 0)
        for i,task in enumerate(sums.T):
            p = ax.plot(log(2 + task[::20]), 
                    color =task_colors[i],
                    label = cmaps[i])
예제 #32
0
파일: stats.py 프로젝트: bh0085/projects
def show_fixations(all_fixations,cmaps):

    fstats = fixation_stats(all_fixations)
    ranked_tasks = fstats['ranked_ages']
    
    names = ['s_star','torus','torus_repl']

    #argsort(ages, key = lambda)

    mut_vals =  [0.0005, 0.001, 0.002, 0.003, 0.004]
   
    xs = []
    ys = []
    cs = []

    name_colors = mycolors.getct(3)
    task_colors = mycolors.getct(9)

    all_deltas = zeros((len(mut_vals),
                        len(all_fixations.values()[0]),
                        8))
    all_fracs =  zeros((len(mut_vals),
                        len(all_fixations.values()[0])))
    all_counts =  zeros((len(mut_vals),
                        len(all_fixations.values()[0])))
    mut_spool = []
    
    for i, m in enumerate(mut_vals):
        fix_map = all_fixations[m]
        for j, e in enumerate(fix_map):
            name_c= name_colors[j]
            
            task_10ptime = array([ [item[1] for item in e[rt[0]]['1']] 
                                   for rt in ranked_tasks])
            idxs_allowed = nonzero(greater(np.min(task_10ptime, 0),0))[0]
            frac_allowed =float( len(idxs_allowed))/ shape(task_10ptime)[1]
            
            '''roll deltas for all sxsful completions'''
            if len(idxs_allowed )!= 0:
                nrml = task_10ptime[:,idxs_allowed]
                #nrml = 1
                deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \
                                       - task_10ptime[:,idxs_allowed]) / \
                                    nrml ,1)
                all_deltas[i,j,:] = deltas[:-1]
                all_counts[i,j] = len(idxs_allowed)
                all_fracs[i,j] = frac_allowed
                mut_spool.append({'tuple':(i,j),
                                  'mut':m,
                                  'name': j})

            for k, e2 in enumerate(e.iteritems()):
                t,v = e2
                task_c = task_colors[k]
                p10_times = [item[1] 
                             for item in v['5'] if item[0] != -1]
                n= len(p10_times)
                these_x = (zeros(n) + i ) + random.uniform(size = n)/3
                xs.extend(these_x)
                ys.extend(p10_times)
                cs.extend([task_c] * n)
    f = myplots.fignum(2,(6,6))
    ax = f.add_subplot(111)
    ax.set_title('fixation times for all tasks')
    ax.set_xlabel('mutation rate')
    ax.set_ylabel('fixation time')
    #ax.scatter(xs, ys, 20, color = cs,alpha = .4)
                

    f2 = myplots.fignum(3, (8,8))
    ax = f2.add_axes([.3,.3,.6,.6])
    ax.set_title('fixation time (fold change over previous tasks)')
    ax.set_xlabel('task')
    ax.set_ylabel('condition')
    
    xlabels = [(cmaps[e[0][0]],cmaps[e[1][0]]) 
               for e in zip(ranked_tasks, roll(ranked_tasks,-1,0))][0:-1]

    ax.set_xticks(range(len(xlabels)))
    ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels],
                       rotation = -90,
                       va = 'top',
                       ha = 'left')


    rows = []
    labels = []
    for ms in sorted(mut_spool, key = lambda x:x['name']):
        tup = ms['tuple']
        rows.append(all_deltas[tup[0],tup[1],:])
        ct  = all_counts[tup]
        frac =all_fracs[tup]
        mut = ms['mut']
        labels.append('{0}: mut rate={1};n={2}'.\
                          format(names[ms['name']],mut,int(ct),frac)
                      )
    

    im = ax.imshow(rows,interpolation = 'nearest')
    f2.colorbar(im)    

    
    ax.set_yticks(range(len(mut_spool)))
    ax.set_yticklabels(labels)

    f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
예제 #33
0
파일: analyze.py 프로젝트: bh0085/compbio
def motif_name_vs_induction(mgroup = None,
                            mtuple = None,
                            hit = True,
                            induction_type = 'ratio'):

    if mgroup != None:
        mtuples = motif_grps(mgroup, hit = hit)
        mdict = get_motif_dicts()
        muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples])))
    else:
        muts_allowed = set(get_motif_dicts()[mtuple])

    motifs = get_motifs()
    seqs, rndvals, keys = get_mutants()
    #USE ONLY FILTERED SEQS!
    seqs, rndvals, keys = \
        [seqs[i] for i in muts_allowed],\
        array([rndvals[i] for i in muts_allowed]),\
        [keys[i] for i in muts_allowed]
    
    keys_allowed = set(keys)
    
    if induction_type == 'ratio':  mut_inductions =(rndvals[:,0] / rndvals[:,1])
    elif induction_type == 'on': mut_inductions =  rndvals[:,0]
    elif induction_type == 'off': mut_inductions = rndvals[:,1]
    
    inductions = dict([(keys[i], mut_inductions[i] )
                       for i in range(len(rndvals)) ] )    

    if mgroup == None:
        figtitle = 'motifs/ind_type={1}/mname_v_induction_tuple={0}'.\
            format(mtuple, induction_type)
    else:
        figtitle = 'motifs/ind_type={1}/mname_v_induction_group={0}'.\
            format(mgroup, induction_type)
        
    
    
    f = myplots.fignum(3, (8,8))
    
    m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs] 
                                   for k, seq_motifs in motifs.iteritems()
                                   if k in keys_allowed]))
    
    unq_keys = list(set(m_occurences))
    kcount =[ m_occurences.count(m ) for m in unq_keys ]
    msort = nonzero(greater(kcount, 200))[0]
    #TAKING ONLY THE 10 MOST COMMON MOTIFS
    unq_keys = [unq_keys[i] for i in msort[0:]]

    m_total_scores = dict([(mkey,
                            [{'seq':skey, 
                              'score':sum([ elt['score'] 
                                        for elt in seq_motifs 
                                        if elt['motif'] == mkey ]),
                              'starts':[elt['start'] 
                                        for elt in seq_motifs 
                                        if elt['motif'] == mkey],
                              'stops':[elt['end']
                                       for elt in seq_motifs
                                       if elt['motif'] == mkey]}
                             for skey, seq_motifs in motifs.iteritems() 
                             if skey in keys_allowed])
                        for mkey in unq_keys])
    

    ax = f.add_subplot(211)
    ax2 = f.add_subplot(212)
    count = -1
    colors = mycolors.getct(len(unq_keys))
    
    for mname, scores in m_total_scores.iteritems():
        count += 1
        thr = .15
        inds = [log(inductions[elt['seq']]) for elt in scores if elt['score'] > thr]
        if len(inds)< 3: continue
        these_scores =  [ v['score'] for v in scores if v['score'] > thr]
        

        xax = linspace(min(these_scores), max(these_scores),5)
        pfit = polyfit(these_scores, inds, 1)
        ax.plot(xax, polyval(pfit,xax), 
                color = colors[count], linewidth = 3)

        ofs = 0
        xseq = arange(len(seqs[0]))
        for seqelt in scores[:100]:
            for start,stop in zip(*[seqelt['starts'], seqelt['stops']]):
                ofs += .25

                ax2.plot([start,stop], [ofs, ofs+.2], 
                         alpha = .5,color = 'red' if pfit[0] < 0 else 'blue')
                         

    fpath = figtemplate.format(figtitle)
    if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath))
    f.savefig(figtemplate.format(figtitle))

    

    pass
예제 #34
0
파일: analyze.py 프로젝트: bh0085/compbio
def motif_dist_v_cooperativity(mgroup = None,
                               mtuple = None,
                               hit = True,
                               induction_type = 'ratio',
                               midpoint = None):

    if mgroup != None:
        mtuples = motif_grps(mgroup, hit = hit)
        mdict = get_motif_dicts()
        muts_allowed = set(list(it.chain(*[mdict[k] for k in mtuples])))
    else:
        muts_allowed = set(get_motif_dicts()[mtuple])
    
    assert midpoint != None

    
    motifs = get_motifs()
    seqs, rndvals, keys = get_mutants()
    #USE ONLY FILTERED SEQS!
    seqs, rndvals, keys = \
        [seqs[i] for i in muts_allowed],\
        array([rndvals[i] for i in muts_allowed]),\
        [keys[i] for i in muts_allowed]
    
    keys_allowed = set(keys)
    
    if induction_type == 'ratio':  mut_inductions =(rndvals[:,0] / rndvals[:,1])
    elif induction_type == 'on': mut_inductions =  rndvals[:,0]
    elif induction_type == 'off': mut_inductions = rndvals[:,1]
    
    inductions = dict([(keys[i], mut_inductions[i] )
                       for i in range(len(rndvals)) ] )    

    if mgroup == None:
        figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_tuple={0}'.\
            format(mtuple, induction_type)
    else:
        figtitle = 'motifs/ind_type={1}/motif_dist_v_cooperativity_group={0}{2}'.\
            format(mgroup, induction_type, 
                   '_hit' if  hit else '')
        
    
    thr = .25
    m_occurences = list(it.chain(*[[elt['motif'] for elt in seq_motifs if elt['score'] > thr] 
                                   for k, seq_motifs in motifs.iteritems()
                                   if k in keys_allowed]))
    
    #unq_keys = sorted(list(set(m_occurences)))
    
    kcounts = dict([(k, len(list(g))) for k, g  in it.groupby(sorted(m_occurences))])
    unq_keys = kcounts.keys()
    kcount =[ kcounts[m] for m in unq_keys ]
    msort = nonzero(greater(kcount, 5) * less(kcount, 500))[0]
    msort = sorted(msort, key = lambda x: kcount[x])[::-1]
    #TAKING ONLY THE 10 MOST COMMON MOTIFS
    unq_keys = [unq_keys[i] for i in msort[0:]]



    f = myplots.fignum(3, (8,8))
    ax = f.add_subplot(111)
    colors = mycolors.getct(len(unq_keys))
    ct = 0
    all_vals = []
    for mkey in unq_keys:
        mscores =[{'seq':skey, 
                   'score': [ elt['score'] 
                              for elt in motifs[skey] 
                              if elt['motif'] == mkey and elt['score'] > thr ],
                   'starts':[elt['start'] 
                             for elt in motifs[skey] 
                             if elt['motif'] == mkey and elt['score'] > thr],
                   'stops':[elt['end']
                            for elt in motifs[skey]
                            if elt['motif'] == mkey and elt['score'] > thr]}
                  for skey in keys_allowed]
        

        vals = list(it.chain(*[[ (midpoint - \
                                 mean([mseq['starts'][i], mseq['stops'][i]]),\
                                 log(inductions[mseq['seq']]) )  
                            for i in range(len(mseq['starts'])) ] 
                          for mseq in mscores ] ))
        
        if vals: 
            #ax.scatter(*zip(*vals) ,
            #                 s = 10, alpha = .25,
            #                 c = colors[ct])
            all_vals.extend(vals)


        ct += 1
        if ct > 1000:
            break

    vsrt = sorted(all_vals, key = lambda x: x[0])
    xv = [a[0] for  a in all_vals]
    means = zeros(max(xv)+1)
    #counts = zeros(max(xv)+1)
    for k, g in it.groupby(all_vals, key = lambda x: x[0]):
        means[k]  = percentile([elt[1] for elt in g], 90)
    
    elts = nonzero(means)[0]
    ax.plot(elts, [means[e] for e in elts])
    
    ax.set_xlabel('distance from strongest promoters')
    ax.set_ylabel('induction')


    ax.annotate(figtitle, [0,0],
                xycoords ='figure fraction',
                va = 'bottom', ha = 'left')
    fpath = figtemplate.format(figtitle)
    if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath))
    f.savefig(figtemplate.format(figtitle))
예제 #35
0
파일: stats.py 프로젝트: bh0085/projects
def show_fixations(all_fixations, cmaps):

    fstats = fixation_stats(all_fixations)
    ranked_tasks = fstats['ranked_ages']

    names = ['s_star', 'torus', 'torus_repl']

    #argsort(ages, key = lambda)

    mut_vals = [0.0005, 0.001, 0.002, 0.003, 0.004]

    xs = []
    ys = []
    cs = []

    name_colors = mycolors.getct(3)
    task_colors = mycolors.getct(9)

    all_deltas = zeros((len(mut_vals), len(all_fixations.values()[0]), 8))
    all_fracs = zeros((len(mut_vals), len(all_fixations.values()[0])))
    all_counts = zeros((len(mut_vals), len(all_fixations.values()[0])))
    mut_spool = []

    for i, m in enumerate(mut_vals):
        fix_map = all_fixations[m]
        for j, e in enumerate(fix_map):
            name_c = name_colors[j]

            task_10ptime = array([[item[1] for item in e[rt[0]]['1']]
                                  for rt in ranked_tasks])
            idxs_allowed = nonzero(greater(np.min(task_10ptime, 0), 0))[0]
            frac_allowed = float(len(idxs_allowed)) / shape(task_10ptime)[1]
            '''roll deltas for all sxsful completions'''
            if len(idxs_allowed) != 0:
                nrml = task_10ptime[:, idxs_allowed]
                #nrml = 1
                deltas = np.mean((roll(task_10ptime[:,idxs_allowed],-1) \
                                       - task_10ptime[:,idxs_allowed]) / \
                                    nrml ,1)
                all_deltas[i, j, :] = deltas[:-1]
                all_counts[i, j] = len(idxs_allowed)
                all_fracs[i, j] = frac_allowed
                mut_spool.append({'tuple': (i, j), 'mut': m, 'name': j})

            for k, e2 in enumerate(e.iteritems()):
                t, v = e2
                task_c = task_colors[k]
                p10_times = [item[1] for item in v['5'] if item[0] != -1]
                n = len(p10_times)
                these_x = (zeros(n) + i) + random.uniform(size=n) / 3
                xs.extend(these_x)
                ys.extend(p10_times)
                cs.extend([task_c] * n)
    f = myplots.fignum(2, (6, 6))
    ax = f.add_subplot(111)
    ax.set_title('fixation times for all tasks')
    ax.set_xlabel('mutation rate')
    ax.set_ylabel('fixation time')
    #ax.scatter(xs, ys, 20, color = cs,alpha = .4)

    f2 = myplots.fignum(3, (8, 8))
    ax = f2.add_axes([.3, .3, .6, .6])
    ax.set_title('fixation time (fold change over previous tasks)')
    ax.set_xlabel('task')
    ax.set_ylabel('condition')

    xlabels = [(cmaps[e[0][0]], cmaps[e[1][0]])
               for e in zip(ranked_tasks, roll(ranked_tasks, -1, 0))][0:-1]

    ax.set_xticks(range(len(xlabels)))
    ax.set_xticklabels(['{0} -> {1}'.format(*xl) for xl in xlabels],
                       rotation=-90,
                       va='top',
                       ha='left')

    rows = []
    labels = []
    for ms in sorted(mut_spool, key=lambda x: x['name']):
        tup = ms['tuple']
        rows.append(all_deltas[tup[0], tup[1], :])
        ct = all_counts[tup]
        frac = all_fracs[tup]
        mut = ms['mut']
        labels.append('{0}: mut rate={1};n={2}'.\
                          format(names[ms['name']],mut,int(ct),frac)
                      )

    im = ax.imshow(rows, interpolation='nearest')
    f2.colorbar(im)

    ax.set_yticks(range(len(mut_spool)))
    ax.set_yticklabels(labels)

    f2.savefig(myplots.figpath('graph_evolution_acceleration.pdf'))
예제 #36
0
파일: analyze.py 프로젝트: bh0085/compbio
def site_energy_deltas(showtype = 'first_part_energies',
                       muts_allowed = None,
                       mkey = None,
                       figtitle = 'sed',
                       smoothing = None,
                       sub_means = True,
                       en_type = 'double',
                       induction_type = 'ratio'):
    seqs, seqs_rndvals, keys = get_mutants()
    mut_inds = site_mut_inds()
    
    mean_induction = get_mean_induction()
    xs, ys, rs, cs , pas, mas, was, bads = [[] for i in range(8)]

    cons = get_cons()
    l = len(cons)
    energies = get_energies()
    enfun = lambda x : sum([energies[''.join(x[pos:pos+2])][2] for pos in range(len(x) -1)])
    
    flip = 0
    
    figtitle = figtitle + '_' + showtype
    if not mkey == None:
        muts_allowed = set(get_motif_dicts()[mkey])
        figtitle = figtitle + '_{0}'.format(mkey)
    if not smoothing  == None:
        figtitle = figtitle + '_sm={0}'.format(smoothing)
        
    if showtype == 'first_part_energies':
        if en_type == 'double':
            ens = get_energies()
            gibbs = dict([(k,v[2]) for k,v in ens.iteritems()])
        else:
            gibbs = get_sing_energies()
        figtitle = figtitle + '_etype={0}'.format(en_type)
        enlen = len(gibbs.keys()[0])
        doubles = gibbs.keys()
        keysrt = dict([(doubles[i], elt) for i,elt in enumerate( argsort(argsort(gibbs.values())))])
        dub_muts = [[[] for j in range(len(keysrt)) ] for i in range(len(keysrt))]

    figtitle = figtitle + '_ind=' + induction_type

    for site, muts in enumerate(mut_inds):
        if muts_allowed != None: muts = list(muts_allowed.intersection(muts))
        mut_avg = mean( seqs_rndvals[muts,0] / seqs_rndvals[muts,1])
        
        trip_rng = position_triplet(site)
        wt_seq = array([cons[s] for s in trip_rng])

        mut_seqs = seqs[muts][:,trip_rng]
        
        if induction_type == 'ratio':  mut_inductions =(seqs_rndvals[muts,0] / seqs_rndvals[muts,1])
        elif induction_type == 'on': mut_inductions =  seqs_rndvals[muts,0]
        elif induction_type == 'off': mut_inductions = seqs_rndvals[muts,1]

        wt_e = enfun(wt_seq)
        mut_es = [enfun(seq) for seq in mut_seqs]
        
        if showtype == 'show_avgs':
            
            plus_avg = mean(mut_inductions[nonzero(greater(mut_es, wt_e))[0]])
            minus_avg = mean(mut_inductions[nonzero(less_equal(mut_es, wt_e))[0]])
            cs.extend(['red', 'blue'])
            xs.extend([site] * 2)
            ys.extend([plus_avg, minus_avg])
            rs.extend([[30] * 2])
            
        elif showtype == 'show_all':
                                                    
            cs.extend(['red' if e > wt_e else 'blue' for e in mut_es])
            xs.extend([site] * len(mut_es))
            ys.extend(mut_inductions + np.random.rand(len(mut_es)) * .1)
            rs.extend([10] * len(mut_es))
    
        elif showtype == 'smoothed_avg':
            
            plus_avg = mean(mut_inductions[nonzero(greater(mut_es, wt_e))[0]])
            minus_avg = mean(mut_inductions[nonzero(less_equal(mut_es, wt_e))[0]])
            whole_avg = mean(mut_inductions)

            if isnan(plus_avg): 
                bads.append(site)
                plus_avg = 1
            if isnan(minus_avg):
                bads.append(site)
                minus_avg = 1
            if isnan(whole_avg): 
                bads.append(site)
                whole_avg = 1

            pas.append(plus_avg)
            mas.append(minus_avg)
            was.append(whole_avg)


            cs.extend(['red', 'blue'])
            xs.extend([site] * 2)
            ys.extend([plus_avg, minus_avg])
            rs.extend([[30] * 2])
                        
        elif showtype == 'first_part_energies':
            for idx, trip in enumerate(mut_seqs):
                if len(trip) == 2 : continue
                if site < 1: continue
                if site > 28: continue

                for pos in range(len(trip) - enlen):
                    midx = keysrt[''.join(trip[pos:pos+enlen])]
                    cidx = keysrt[''.join(wt_seq[pos:pos+enlen])]
                    dub_muts[cidx][midx].append( mut_inductions[idx] )
            
    
    f = myplots.fignum(3,(8,8))
    ax = f.add_subplot(111)

    

    if showtype in ['show_avgs', 'show_all']: 
        ydat = np.log(ys)
        xdat = array(xs)
        cs = array(cs)
        rs = array(rs)
        ax.scatter(xdat, ydat, array(rs), c= cs, alpha = .2)
        
        xlim = [min(xs), max(xs)]
        ylim = [min(ydat), max(ydat)]
        ax.set_title('Average induction of site mutants (red mutants have stronger pairing)')
        ax.set_ylabel('fold induction')
        ax.set_xlabel('mutation site')
            

    elif showtype == 'smoothed_avg':
        pas =log( array(pas))
        mas =log( array(mas))
        was =log(array(was))
        if sub_means: subvec = was
        else: subvec = zeros(len(was))
        if smoothing == None:
            plus = pas - subvec
            minus =mas - subvec 
        else:
            plus =sgs.smooth(pas  -subvec, smoothing)
            minus =sgs.smooth(mas -subvec, smoothing)
    
        goods = ones(len(plus))
        goods[[b for b in bads]] = 0
        plus = plus * goods
        minus = minus * goods

        ydat = plus
        xdat = arange(len(plus))

        ax.plot(plus , color = 'white', linewidth = 5)
        ax.plot(minus, color = 'white', linewidth = 5)
        ax.plot(plus, color = 'red', linewidth = 3)
        ax.plot(minus, color = 'blue', linewidth = 3)
        ax.fill_between(arange(l), plus, minus, where = greater_equal(minus, plus),
                        color = 'blue', alpha = .5, interpolate = True)
        ax.fill_between(arange(l), minus, plus, where = less_equal(minus, plus),
                        color = 'red', alpha = .5, interpolate = True)
            
        #ax.set_xlim([min(xdat),max(xdat)]); ax.set_ylim([min(ydat), max(ydat)])
        ax.set_title('Average induction of site mutants (red mutants have stronger pairing)')
        ax.set_ylabel('fold induction')
        ax.set_xlabel('mutation site')
    elif showtype == 'first_part_energies':
        f = myplots.fignum(3,(8,4))
        f.clear()
        ax = f.add_subplot(121)
        img =array( [[mean(log(inductions)) if sum(inductions) != 0 else nan for inductions in row] for  row in dub_muts])
        img[isinf(img)] = min(img[isfinite(img)]) 
        ax.imshow(img,
                  interpolation = 'nearest',
                  cmap = plt.get_cmap('OrRd'))
        ax2 = f.add_subplot(122)
        ax2.imshow([[len(log(inductions)) for inductions in row] for row in dub_muts],
                  interpolation = 'nearest',
                  cmap = plt.get_cmap('OrRd'))
        for a in [ax,ax2]:
            a.set_xticks(keysrt.values())
            a.set_xticklabels(keysrt.keys())
            a.set_yticks(keysrt.values())
            a.set_yticklabels(keysrt.keys())
        ax.set_title('mean induction change')
        ax2.set_title('transition counts')
        ax.set_ylabel('wildtype base')
        ax.set_xlabel('mut base')

    else:
        raise Exception()

    if showtype in ['show_avgs', 'show_all', 'smoothed_avg']:
        l0 = [ax.get_ylim(), ax.get_xlim()]
        
        for rng in motif_rngs():
            ax.plot(rng,[0]*2, linewidth = 6, color = 'black') 
            ax.fill_betweenx(ax.get_ylim(), [rng[0]] * 2, [rng[1]] * 2, alpha = .2, color = 'black')
            for r in rng:
                ax.vlines(r,*ax.get_ylim(),alpha = .75)
        ax.set_ylim(l0[0]); ax.set_xlim(l0[1])
        


    ax.annotate(figtitle, [0,0],
                xycoords ='figure fraction',
                va = 'bottom', ha = 'left')

    f.savefig(figtemplate.format(figtitle))
예제 #37
0
파일: tal_struct.py 프로젝트: bh0085/zhang
def run():
    
    chains = parse_all()
    
    c0 = chains[0]


    ksrt =sorted(c0.keys())
    charges = array([ sum([float(e['charge'])  for e in c0[k]['pqr']]) for k in ksrt])
    coords = array([ mean([e.get_coord() for e in c0[k]['pdb']], 0 ) for k in ksrt])
   
 
    c1,c2 = chains[1:]
    
    strand_charges = []
    strand_coords = []
    for c in [c1, c2]:
        ksrt =sorted(c.keys())
        strand_charges.append(array([ sum([float(e['charge'])  for e in c[k]['pqr']]) for k in ksrt]))
        strand_coords.append(array([ mean([e.get_coord() for e in c[k]['pdb']], 0 ) for k in ksrt]))
  
    k1 = sorted([nt for nt in c1])
    k2 = sorted([nt for nt in c2])
    s1_atoms = list(it.chain(*[ [e.get_coord() for e in c1[k]['pdb'] ] for k in k1]))
    s2_atoms = list(it.chain(*[ [e.get_coord() for e in c2[k]['pdb'] ] for k in k2]))
    dna_atoms = []
    dna_atoms.extend(s1_atoms)
    dna_atoms.extend(s2_atoms)
    dna_atoms = array(dna_atoms)
    
    #nearest neighbor params:
    kres =0 
    katoms_dna = 3
    kres_atoms = 3

    rvd_res = rvd_residues(c0,kres)

    xs = []
    ys = []
    cs = []
    ss = []
    ecs = []
    rdists = []

    rvd_groups = [];
    for i, rvd in enumerate(rvd_res):
        for r in rvd:
            atoms = array([e.get_coord() 
                           for e in r['pdb']])
            dists = sum(square(atoms),1)[:,newaxis] + \
                sum(square(dna_atoms),1)[newaxis,:] - \
                2 *sum(atoms[:,newaxis,:] * dna_atoms[newaxis,:,:],2)
            atom_srt_dists = np.sort(dists, 1)            
            atom_knn_avgdist = np.mean(atom_srt_dists[:,:katoms_dna],1)
            res_srt_dists = np.sort(atom_knn_avgdist)
            res_k_avgdist = res_srt_dists[:kres_atoms]

            xs.append(mean(atoms[:,0]))
            ys.append(mean(atoms[:,1]))
            #colors =  array([1,0,0]) * 1/atom_knn_avgdist[:,newaxis]
            cs.append(1/res_k_avgdist)
            rdists.append(res_k_avgdist)
            ss.append(50)
            ecs.append('none')
            rvd_groups.append(i)

    show_helix = False        
    if show_helix:
        cs = array(cs)
        cs /= np.max(cs)
        f = mp.fignum(1, (12,12))
        ax = f.add_subplot(111)
        ax.scatter(xs,ys,c = cs, s= ss, edgecolor = ecs)
        f.savefig(mp.figpath(figt.format('tal_rvd_neighborhoods')))
    
    rvd_dists =  [(k,[e[1] for e in list(g)]) 
                  for k,g in it.groupby(zip(rvd_groups,rdists), lambda x: x[0])]
    rs = rvds()
    tags = [ seq()[r:r+2] for r in rs ]
    nt = len(set(tags))
    
    tag_idx_in_ct = dict([(e,i) for i,e in enumerate(set(tags))])
    rvd_ct_map = dict([(i,tag_idx_in_ct[e]) for i,e in enumerate(tags)])
    ct = mycolors.getct(nt)

    f = mp.fignum(3, (12,12))
    ax= f.add_subplot(111)
    ax.set_xlabel('linear distance')
    ax.set_ylabel('nearest neighbor distance to DNA')
    labels_set = set([])
    for k, g in rvd_dists:
        if tags[k] in labels_set:
            ax.plot(g, color = ct[rvd_ct_map[k]])

        else:
            labels_set.add(tags[k])
            print 'labelling'
            ax.plot(g, color = ct[rvd_ct_map[k]],label = tags[k])
    ax.legend()
        
    f.savefig(mp.figpath(figt.format('tal_rvd_distances')))
        

    #plot_charges(coords, charges, strand_coords)
    
    return