예제 #1
0
파일: parse.py 프로젝트: bh0085/compbio
    def set_simple_description(**kwargs):
        props = get_assay_gprops(**mem.sr(kwargs))
        chips = get_assay_info(**mem.sr(kwargs))
        simple = {}
        for tf, assays in props.iteritems():
            simple[tf] = {}
            assay_keys = assays.keys()

            idfun = lambda g: g.qualifiers['db_xref'][1][9:] 

            simple[tf]['gnames'] = list(it.chain(*[ 
                        [idfun(e['gene']) for e in assays[k]['primaries']]
                        for k in assay_keys
                        ]))
            simple[tf]['genes'] = list(it.chain(*[ 
                        [e['gene'] for e in assays[k]['primaries']]
                        for k in assay_keys
                        ]))
            simple[tf]['dists'] = array(list(it.chain(*[
                        [e['dist'] for e in assays[k]['primaries']]
                        for k in assay_keys
                        ])))
            simple[tf]['scores'] = array(list(it.chain(*[
                        [e['score'] for e in chips[tf][k]]
                        for k in assay_keys
                        ])))
        return simple
예제 #2
0
파일: worm.py 프로젝트: bh0085/compbio
    def set_synapse_array(**kwargs):

        imaps = get_array_imaps(**mem.sr(kwargs))
        ctypes =imaps['ctypes']
        ctypes_imap = imaps['ctypes_imap']
        nnames = imaps['nnames']
        nnames_imap = imaps['nnames_imap']
        
        all_out_cxns = get_synapse_dict(**mem.sr(kwargs))

        cxns = zeros((len(nnames),len(nnames), len(ctypes)))
        for k1,rows in all_out_cxns.iteritems():
           for row in rows:
               cxns[nnames_imap[k1],nnames_imap[row[1]],
                    ctypes_imap[row[2]]] +=row[3]  

        return cxns
예제 #3
0
파일: ows_tools.py 프로젝트: bh0085/synql
def plot_city_posts(**kwargs):
    cp = city_posts(**mem.sr(kwargs))
    xs = cp["lons"]
    ys = cp["lats"]
    rs = [len(x) for x in cp["posts"]]

    f = myplots.fignum(3, (4, 4))
    ax = f.add_subplot(111)
    ax.scatter(xs, ys, s=rs)
예제 #4
0
파일: parse.py 프로젝트: bh0085/compbio
    def set_assay_gprops(**kwargs):
       chips = get_assay_info(**mem.sr(kwargs))
       genes = parse_genes()
       tf_stats = {}
       for k, v in chips.iteritems():
           tf_stats[k] = {}
           for k2,v2 in v.iteritems():
               print 'n_exps = {0}'.\
                   format(np.sum([len(v) 
                                  for v in tf_stats.values()]))

               tf_stats[k][k2] = {}
               cs = [e['chr'] for e in v2]
               f_gups=[genes[cs[i]][e['fup_gene']] for i,e in enumerate(v2)]
               f_gdowns=[genes[cs[i]][e['fdown_gene']] for i,e in enumerate(v2)]
               
               fup_deltas = [e['mean'] - f_gups[i].location.start.position
                             for i,e in enumerate(v2)]
               fdown_deltas=[e['mean'] - f_gdowns[i].location.start.position
                             for i,e in enumerate(v2)]
               
               
               r_gups=[genes[cs[i]][e['rup_gene']] for i,e in enumerate(v2)]
               r_gdowns=[genes[cs[i]][e['rdown_gene']] for i,e in enumerate(v2)]
               
               rup_deltas = [e['mean'] - r_gups[i].location.end.position
                             for i,e in enumerate(v2)]
               rdown_deltas=[e['mean'] - r_gdowns[i].location.end.position
                             for i,e in enumerate(v2)]
               
               deltas = array([fdown_deltas,
                               fup_deltas,
                               rdown_deltas,
                               rup_deltas]).T
               
               closest = argmin(np.abs(deltas),1)
               csrt = argsort(np.abs(deltas),1)
               
               primaries = []
               secondaries = []
               for i,c in enumerate(csrt):
                   for j,e in enumerate(c[:2]):
                       arr = primaries if j == 0 else secondaries
                       d = {}
                       if e == 0 : d['gene'] = f_gdowns[i]
                       elif e==1 : d['gene'] = f_gups[i]
                       elif e==2 : d['gene'] = r_gdowns[i]
                       elif e==3 : d['gene'] = r_gups[i]
                       
                       d['dist'] = deltas[i,e] * (-1 if e < 2 else 1)
                       arr.append(d)
       
               tf_stats[k][k2]['primaries'] = primaries
               tf_stats[k][k2]['secondaries'] = secondaries
       return tf_stats
예제 #5
0
파일: worm.py 프로젝트: bh0085/compbio
 def set_graph(**kwargs):
     edge_set = get_edge_set()
     rows = get_rows(**mem.sr(kwargs))
     sub_cxns = dict([(k, [ e for e in list(val) if e[2] in edge_set])
                      for k,val in it.groupby(\
                 sorted(rows, key = lambda x: x[0]),
                 key = lambda x: x[0])])
     g = nx.DiGraph();
     for k, v in  sub_cxns.iteritems():
         g.add_weighted_edges_from([(e[0], e[1], e[3]) for e in v])
     return g
예제 #6
0
파일: utils.py 프로젝트: bh0085/compbio
 def set_term_network( **kwargs):
   nterms = kwargs.get('nterms')
   name = kwargs.get('name')
   if name == 'bdtnp': gene_list = nio.getBDTNP().keys()
   elif name == 'kn':    gene_list = graphs['kn'].nodes()
   grps = term_groups(**mem.sr(kwargs,
                               name = name))
   network = nx.Graph()
   network.add_nodes_from(gene_list)
 
   
   for g in grps:
     edgelist =  [[g1[0],g2[0]]
                  for g1 in g[1] for g2 in g[1]]
     network.add_edges_from( edgelist  )
   return network
예제 #7
0
파일: worm.py 프로젝트: bh0085/compbio
    def set_array_imaps(**kwargs):
        sdict =get_synapse_dict(**mem.sr(kwargs))
        nameset = set([])
        for k,v in sdict.iteritems():
            nameset.add(k)
            nameset.update([r[1] for r in v])
        nnames = list(nameset)

        ctypes = [u'Rp', u'EJ', u'Sp', u'S', u'R', u'NMJ']
        ctypes_imap = dict([(k,i) for i, k in enumerate(ctypes)])
        nnames_imap = dict([(k,i) for i, k in enumerate(nnames)])

        return {'ctypes':ctypes,
                'ctypes_imap':ctypes_imap,
                'nnames':nnames,
                'nnames_imap':nnames_imap}
예제 #8
0
파일: ows_tools.py 프로젝트: bh0085/synql
def plot_ronn(**kwargs):
    l5 = last_5(**mem.sr(kwargs))
    ys = zeros((len(l5), len(l5.values()[0])))
    for i, h in enumerate(l5.values()):
        for j, d in enumerate(h):
            if len(d) == 0:
                continue

            cas = [e.created_at for e in d]
            secs = np.sum([(((ca.hour) * 60 + ca.minute) * 60) + ca.second for ca in cas])
            ys[i, j] = secs

    ys = ys[:, -20:]
    ys = ys - np.min(ys, 1)[:, newaxis]
    colors = mycolors.getct(len(ys))
    seismic.seismic(ys, stacked=True, colors=colors)
예제 #9
0
파일: parse.py 프로젝트: bh0085/compbio
 def set_simple_thr(**kwargs):
     dthr = kwargs['dthr']
     dsign = kwargs['dsign']
     sthr = kwargs['sthr']
     simple = get_simple_description(**mem.sr(kwargs))
     out = {}
     for k,v in simple.iteritems():
         criteria = ones(len(v['scores']))
         if sthr != None:
             criteria *= less(v['scores'],sthr)
         if dsign != None:
             criteria *= greater(v['dists']*dsign, 0)
         if dthr != None:
             criteria *= less(abs(v['dists']),dthr)
                         
         allowed = nonzero(criteria)[0]
         out[k] = {'genes':[v['genes'][i] for i in allowed],
                   'gnames':[v['gnames'][i] for i in allowed],
                   'dists':v['dists'][allowed],
                   'scores':v['scores'][allowed]}
     return out
예제 #10
0
파일: genes.py 프로젝트: bh0085/compbio
    def setAllGenes(**kwargs):
       allPeaks = getPeaks()
       all_results = {}
       for num in range(1,20) + ['X']:
           print 'Parsing Chromosome: chr{0}'.format(num)
           genes_dict = {}
           all_results['chr{0}'.format(num)] = genes_dict
           chrgenes = getTrackChrGenes(**mem.sr(kwargs, num = num))

           peaks = allPeaks['chr{0}'.format(num)]
           for i, g in enumerate(chrgenes):
               name = g['name']
               startpos = g['start'] if g['strand'] == 1 else g['end']
               hits = []
               for p in peaks:
                   stranded_offset =array([ g['strand'] * (p['start']  - startpos),
                                           g['strand'] * (p['end'] - startpos)])
                   if( np.min(abs(stranded_offset)) < 2000 \
                           or np.prod(stranded_offset) < 0):
                       stranded_offset.sort()
                       hits.append({'peak_info':p,
                                  'peak_stranded_offset':stranded_offset})
               
               hits = sorted(hits,key = lambda x: x['peak_stranded_offset'][0])
               gene_object = {
                   'dnase_peaks':hits,
                   'name':name,
                   'gene_info':g,
                   'start':g['start'],
                   'end':g['end'],
                   'strand':g['strand']
                   }
               genes_dict[name] = gene_object

               if (mod(i,100) == 0):
                   print 'Gene {0}: {1}, {2} hits'.format(i, g['name'], len(hits))
       
       return all_results;
예제 #11
0
파일: analyze.py 프로젝트: bh0085/compbio
 def set_cons(**kwargs):
     seqs, seqs_rndvals, keys = get_mutants(**mem.sr(kwargs))
     cons = consensus_seq(seqs[::100])
     return cons
예제 #12
0
파일: parse.py 프로젝트: bh0085/compbio
    def set_assay_info(**kwargs):
       #peaks and keys 
       atype = kwargs['atype']
       if atype == 'tfchip':
           assay_peaks = tf_chip_peaks(**mem.sr(kwargs))
       elif atype=='wormtile':
           assay_peaks = tiling_peaks(**mem.sr(kwargs))
       else: raise Exception()
       pkeys = assay_peaks.keys()
       
       keyvalfuns = {'tfchip':lambda x: x[:x.index(':')],
                     'wormtile':lambda x:re.compile('Tissue=([^\(]*)')\
                         .search(x).group(1)}
       #index and uniquify tfs
       pkeyvals = dict([(k, keyvalfuns[atype](k)) for k in pkeys])
       tfnames = set(pkeyvals.values())
       tfkeys= dict([(name, [k for k,v in pkeyvals.iteritems()
                             if v == name])
                     for name in tfnames])       

       chrmap = dict(zip(['I','II', 'III','IV', 'V', 'X'],
                         chromosome_names()))
       
       all_tss = get_tss()
       all_genes = parse_genes()
       
       bind_infos = {}
       #loop through tfs, assays, and finally peaks
       for tfname in tfnames:
           print tfname
           bind_infos[tfname] = {}
           for ekey in tfkeys[tfname]:
             print 'n_exps = {0}'.format(np.sum([len(v) 
                                              for v in bind_infos.values()]))
             exp = assay_peaks[ekey]
             bind_infos[tfname][ekey] = []
             for e in exp:
               start = e['start']
               end = e['end']
               c = e['chr']

               #if we have non chromosomal DNA, skip it!
               if c in chrmap.keys():
                   crkey = chrmap[c]
               else: continue
               genes = all_genes[crkey]
               tss = all_tss[crkey]
               
               #grab indexes in the list of upstream and ds genes
               fdownstream_idx =searchsorted(tss['fwd_tss'],(start+end) /2)
               rdownstream_idx =searchsorted(tss['rev_tss'],(start+end) /2, 'right')-1
               
               #handle the case where the factor hits past the 
               #last gene
               fup_ofs = -1
               if fdownstream_idx == len(tss['fwd_genes']):
                   fdownstream_idx = fdownstream_idx -1
                   fup_ofs = 0
               
               rup_ofs = 1
               if fdownstream_idx == len(tss['fwd_genes']):
                   fdownstream_idx = fdownstream_idx -1
                   rup_ofs = 0
               
               #get gene indexes in the chromosomal gene dicts
               #for upstream and downstream
               fdown_gene = tss['fwd_genes'][fdownstream_idx]
               fup_gene = tss['fwd_genes'][fdownstream_idx +fup_ofs]\
                   if fdownstream_idx > 0 \
                   else fdown_gene
               
               rdown_gene = tss['rev_genes'][rdownstream_idx]
               rup_gene = tss['rev_genes'][rdownstream_idx +rup_ofs]\
                   if rdownstream_idx < len(tss['rev_genes']) - 1\
                   else rdown_gene
             
               bind_infos[tfname][ekey].append({
                       'rup_gene':rup_gene,
                       'rdown_gene':rdown_gene,
                       'fup_gene':fup_gene,
                       'fdown_gene':fdown_gene,
                       'chr':crkey,
                       'start':start,
                       'end':end,
                       'mean':(start+end)/2,
                       'score':e['score']                  
       
                })                 
       
       return bind_infos
예제 #13
0
파일: worm.py 프로젝트: bh0085/compbio
def load(plots = defplots,
         reset = False):
    kwargs = dict(reset = reset)
    edge_set = get_edge_set()
    g = get_graph(**mem.sr(kwargs))
    pos = get_pos(**mem.sr(kwargs))
    trips = set([])
    for k1 in g:
        for k2 in g[k1].keys():
            for k3 in g[k1].keys():
                if g[k2].has_key(k3):
                    trips.add((k2,k3,k1))

    tripoints = dict([((e[0],e[1]),pos[e[2]]) for e in trips])
    if plots.get('basic_structure', False):
        f = myplots.fignum(1)
        ax = f.add_subplot(111)
        gd.easy_draw(g, pos)
        
        f.savefig(myplots.figpath('basic_structure_edges={0}'.format(edge_set))) 

    if plots.get('feed_forward', True):
        gd.overlay(g,pos,g.edges(),
                   tripoints = tripoints, 
                   alphas = dict([(e,.1) for e in g.edges()]))
                   
        f.savefig(myplots.figpath('feed_forward_edges={0}'.format(edge_set)))

    if plots.get('degrees' , False):
        make_degree_plots_0();

    maxflow = nx.algorithms.ford_fulkerson(g, 'AVAL','PVPL','weight')

    imaps = get_array_imaps()
    nnames = imaps['nnames']
    node_stats = dict([(k,{}) for k in nnames])
    for k,v in node_stats.iteritems():
        v['out_degree'] = len([e for e in g.edges() if e[0] == k])
        v['in_degree'] = len([e for e in g.edges() if e[1] == k])
        
    f = myplots.fignum(3, (12,6))
    outs = [v['out_degree'] for k, v in node_stats.iteritems()]
    ins =[v['in_degree'] for k , v in node_stats.iteritems()]
    raw_data= array([outs,ins]).T





    make_data_transform(raw_data)
    data = transform_data(raw_data)
    kd = make_kdtree(data)
    k = 5 
    nn = compute_nns(kd, k)
    knn= nn['nn']
    knn_dists = nn['dists']
    dists = compute_dists(data)


    mean_dists = np.mean(knn_dists[:,1:],1)
    mean_colors =sqrt(mean_dists[:,newaxis] * [1/np.max(mean_dists), 0,0])

    ax = f.add_subplot(121)

    ax.scatter(data[:,0],data[:,1],s = 15,
              facecolor = mean_colors,
              edgecolor = 'none'
               )
    
    ax.set_xlabel('scaled out degree')
    ax.set_ylabel('scaled in degree')

    ax2 = f.add_subplot(122)

    ax2.imshow(dists,
               interpolation = 'nearest',
               aspect = 'auto')
    ax2.set_title('distance matrix for scaled degrees')
    
    f.savefig(myplots.figpath('distances_{0}'.format(edge_set)))
        

    return g
예제 #14
0
파일: worm.py 프로젝트: bh0085/compbio
 def set_pos(**kwargs):
     g = get_graph(**mem.sr(kwargs))
     pos = gd.getpos(g)
     return pos