Example #1
0
  def ali_in_tree(self,aliname = 'group2.stk',
                  rank = 'genus',
                  **kwargs):
    all_seqs = ali.get_seqs(aliname)
    alinodes = ali.get_taxnodes(aliname)

    aliranks = [t.rank if t else None for t in alinodes]
    all_leaves = self.t.get_terminals()
    leafnodes = self.leafNodes(reset = mod(reset, 2))
    leafranks =[n.rank if n else None for n in leafnodes]
 
    ali_families = ali.get_taxon_forall(rank = rank,aliname = aliname,
                                        **mem.sr(kwargs))
    leaf_families=  self.getTaxon(rank = rank, **mem.sr(kwargs))

    aset = set(ali_families)
    lset = set(leaf_families)

    a_domains =[(node, ncbi.get_taxon(node,'superkingdom')) for node in aset]
    l_domains =[(node, ncbi.get_taxon(node,'superkingdom')) for node in lset]
    
    bac_domain = [x[1] for x in l_domains if ncbi.sciname(x[1])== 'Bacteria'][0]

    l_bacs = set((l[0] for l in l_domains if l[1] == bac_domain))
    a_bacs = set((a[0] for a in a_domains if a[1] == bac_domain))

    leaf_bacteria = [leaf  if leaf in l_bacs else None for leaf in leaf_families]
    ali_bacteria =  [a  if a  in a_bacs else None for a in ali_families]
  
    return leaf_bacteria, ali_bacteria, leafnodes, alinodes
Example #2
0
def make_anc_batches(aliname, rank_name = 'phylum', 
                     do_bsub = False,
                     run = False, nrun = 0, 
                     **kwargs):
  BT = getBTOL(**mem.sr(kwargs))
  tree_tax = BT.getTaxon(rank_name)
  ali_tax = ali.get_taxon_forall(aliname, rank_name)
  union_tax = set(tree_tax).intersection(set(ali_tax))
  union_taxids = [e.id for e in union_tax if e]

  batch_pdicts = [dict(taxid = taxid,
                       rank_name = rank_name,
                       aliname = aliname)
                  for taxid in union_taxids]


  cmds = []
  for idx,  d in enumerate(batch_pdicts):
    run_id=bsub.get_run_id(idx, prefix = rank_name)
    bsub.save_inp(d, run_id)
    cmds.append(bsub.cmd(os.path.abspath(inspect.stack()[0][1]),'run_anc',run_id,\
                           do_bsub = do_bsub, run_id = run_id))

  if run:
    for c in cmds:
      out = subprocess.call(c, shell = True)
      print out

  return cmds
Example #3
0
 def set_taxnodes(**kwargs):
   
   all_seqs = get_seqs(dbname,**mem.sr(kwargs))
   seq_taxa = [s.source_taxon 
                  if s.source_taxon else None 
                  for s in all_seqs]
   alinodes = [ncbi.get_node(s) if s != None else None for s in seq_taxa]
   return alinodes
Example #4
0
 def setBTOL(**kwargs):
   B = BTOL(**mem.sr(kwargs))
   if not B.treeInitialized():
     print 'Underlying tree structure apparently uninitialized: initializing\n...'
     B.initTree()
     print '...\nDone\nSaving\n...'
     B.saveTree()
     print '...\nDone'
   return B
Example #5
0
def check_bdtnp(**kwargs):
    ids = id_map(**mem.sr(kwargs))
    
    btgs = nio.getBDTNP()
    stfs,stgs = nio.getNet()
    
    sxs = dict([(i, {}) for i in ids.keys()])
    for i, elt  in enumerate(ids.iteritems()):
        gid, gene_val = elt
        if stfs.has_key(gid) : sxs[gid]['stf'] = True;
        if btgs.has_key(gid) : sxs[gid]['btg'] = True;
        if stgs.has_key(gid) : sxs[gid]['stg'] = True;

    return sxs
Example #6
0
 def getTaxon(self,rank = rank,
              **kwargs):
   def setTaxon(BTInstance = None, rank = None, **kwargs):
     assert rank; assert BTInstance
     leafnodes = BTInstance.leafNodes(**mem.sr(kwargs))
     leaf_families = [ncbi.get_taxon(node, rank=rank) 
                      if node else None for node in leafnodes]
     return leaf_families
   return mem.getOrSet(setTaxon,
                       **mem.sr(kwargs, 
                                rank = rank, 
                                BTInstance = self,
                                on_fail = 'compute',
                                hardcopy = False,
                                register = rank))
Example #7
0
def run(rfid,run_id, inp_run_id, reset = True,
        draw_alis = draw_all_easy):

    sgs = get_seq_groups(rfid = rfid, **mem.sr({},reset = False))
    all_seq_group_datas = []
    for s in sgs:
        if len(s) < 4:
            all_seq_group_datas.append(None)
            continue
        if len(s) > 40:
            print 'skipping cuz it takes too long!'
            continue
        print 'Not Skipping'
        all_seq_group_datas.append(eval_seq_group(s,rfid, '{0}_{1}'.format(run_id,len(s)), inp_run_id, reset = reset,
                                                  draw_alis = draw_alis))
    return all_seq_group_datas
Example #8
0
def run(**kwargs):
  BT = getBTOL(**mem.sr(kwargs))
  seqnodes = BT.investigatePhylum(**kwargs)
  recs, seqelts, seqtuples = seq_recs(seqnodes)
  align = align_seqnodes(recs)
  tree = phyml.tree(align)
  rstfile= paml.run_paml(tree, align)
  anc_tree = paml.rst_parser(rstfile)

  anc_alignment = [SeqRecord(elt.m['seq'], 
                             id = None,
                             name = elt.name,
                             annotations = {'scores':elt.m['probs']})
                   for elt in anc_tree.get_nonterminals()]
  

  return (tree, anc_tree), (align, anc_alignment)
Example #9
0
 def set_datafiles(**kwargs):
     out ={}
     idmap = id_map(**mem.sr(kwargs))
     for k,v in idmap.iteritems():
         out[k] = array([ [float(e) for e in re.compile('\s+').split(l.strip())] for l in open(v['file']).readlines() if l[0] in '0123456789'])
     return out
Example #10
0
 def __init__(self,
              **kwargs):
   self.t  = sqt.init(**mem.sr(kwargs))
Example #11
0
  def investigatePhylum(self, 
                        aliname = 'group2.stk',
                        p_node = None, **kwargs):

    if not p_node: p_node = ncbi.taxon_with_name('phylum', 'Thermotogae')
    ali_seqs = ali.get_seqs(aliname, **mem.sr(kwargs))
    ali_nodes = array(ali.get_taxnodes(aliname, **mem.sr(kwargs)))
    ali_phyla = array(ali.get_taxon_forall(aliname,**mem.sr(kwargs, rank = 'phylum')))
    ali_inds = nonzero(equal(ali_phyla, p_node))[0]
    
    leaf_terminals = self.t.get_terminals()
    leaf_nodes = array(self.leafNodes(**mem.sr(kwargs)))
    leaf_phyla = array(self.getTaxon('phylum', **mem.sr(kwargs)))
    leaf_inds = nonzero(equal(leaf_phyla, p_node))[0]
        
    ap_sub = ali_phyla[ali_inds]
    lp_sub = leaf_phyla[leaf_inds]

    ag_sub = array(ali.get_taxon_forsome(ali_nodes[ali_inds],'genus','thermo',
                                         **mem.sr(kwargs)))
    lg_sub = array(self.getTaxon('genus', **mem.sr(kwargs)))[leaf_inds]

    as_sub = array(ali.get_taxon_forsome(ali_nodes[ali_inds], 'species', 'thermo'))
    ls_sub = array(self.getTaxon('species',**mem.sr(kwargs)))[leaf_inds]

    db16 = cbdb.getName('16s')
    a_16s= [ db16.S.q(db16.Sequence).
             filter_by(source_taxon = n.id).all() for n in ali_nodes[ali_inds]]
    l_16s= [ db16.S.q(db16.Sequence).
             filter_by(source_taxon = n.id).all() for n in leaf_nodes[leaf_inds]]

    #fill any empty nodes... (those lacking 16s rRNA)
    for idx, elt in enumerate(a_16s):
      cur_node= ali_nodes[ali_inds[idx]]
      while not elt:
        cur_node = cur_node.parent
        elt.extend(db16.S.q(db16.Sequence).filter_by(source_taxon = cur_node.id).all())

    for idx, elt in enumerate(l_16s):
      cur_node= leaf_nodes[leaf_inds[idx]]
      while not elt:
        cur_node = cur_node.parent
        elt.extend(db16.S.q(db16.Sequence).filter_by(source_taxon = cur_node.id).all())



    all_lens = dict([ (k, [len(list( e)) for e in seqlist] )  
                      for seqlist,k in [[a_16s,'a_16s'],[l_16s,'l_16s']]])
    

    leaf_sns = [ SeqNode(lg_sub[i],ls_sub[i] , leaf_nodes[idx], 
                         [(x.sequence,x.gb_id, x.source_taxon, ncbi.get_node(x.source_taxon).rank) 
                          for x in l_16s[i]],
                         src = leaf_terminals[idx],
                         node_id =  'btol:default:{0}'.format(leaf_terminals[idx].m['id']))
                 for i, idx in enumerate(leaf_inds)]
    ali_sns = [ SeqNode(ag_sub[i],as_sub[i] , ali_nodes[idx],
                        [( x.sequence,x.gb_id , x.source_taxon, ncbi.get_node(x.source_taxon).rank) 
                         for x in a_16s[i]],
                        src = ali_seqs[idx], 
                        node_id = 'ali:{0}:{1}'.format(aliname,ali_seqs[idx].id))
                for i, idx in enumerate(ali_inds)]

    return list(it.chain(leaf_sns, ali_sns))
Example #12
0
 def setTaxon(BTInstance = None, rank = None, **kwargs):
   assert rank; assert BTInstance
   leafnodes = BTInstance.leafNodes(**mem.sr(kwargs))
   leaf_families = [ncbi.get_taxon(node, rank=rank) 
                    if node else None for node in leafnodes]
   return leaf_families
Example #13
0
 def setTaxon(aliname = None, rank = None,**kwargs):
   assert aliname != None and rank != None
   nodes = get_taxnodes(aliname,**mem.sr(kwargs))
   taxon = [ncbi.get_taxon(node, rank=rank) 
            if node else None for node in nodes]
   return taxon
Example #14
0
 def setC2(**kwargs):
   ll = c2(**mem.sr(kwargs))
   result =  c2(ll, **mem.sr(kwargs))
   return result