Python profiles Examples

Programming Language: Python

Namespace/Package Name: infernal

Method/Function: profiles

Examples at hotexamples.com: 2

Python profiles - 2 examples found. These are the top rated real world Python examples of infernal.profiles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def setProfiles( seq = None, structs = None, run_id = None,
                 **kwargs ):
    '''
Make profiles for structs in a single sequence using 
infernal.
'''
    assert seq; assert structs; assert run_id
    profiles = infernal.profiles(seq, structs, run_id)
    return profiles

Example #2

Show file

File: rna_ali2d.py Project: bh0085/projects

def get_consensus(rfid = 'RF00', mweight = .5, 
                  refseq_method = 'root', sp_method = 'sample',
                  aff_type = 'pairs',  reset = True,
                  do_plot = False,  run_id = 'CONS_TEST'):

    ali, tree, infos = rfam.get_fam(rfid)
    ali_ids = [a.name for a in ali]

    for i, n in enumerate(tree.get_terminals()):
        term_id = re.compile('_([^_]*)_').search(n.name).group(1) 
        this_seq = ali[ali_ids.index(term_id)]
        n.m = {'seq':this_seq,
               'probs':[1 for j in range(len(this_seq))]}

    #if do_plot : rplots.plot_clusters(inds,{'pca embedding':pca_vecs},title = title,plot3d = True)
    

    big_refnode, big_refseq = \
        subtree_refseq(tree, method = refseq_method)
    ungapped_ref = rutils.ungapped_seq(big_refseq, rfid)
    #pca_vecs,exemplar_structs =
    return family_exemplar_structs(rfid,
                                   sp_method = sp_method,
                                   refseq_method = refseq_method,
                                   aff_type = aff_type,
                                   )
    struct_profiles = infernal.profiles(ungapped_ref,exemplar_structs, run_id)

    clades = split_tree(tree)
    all_vecs = {'all_time':[ [ [] for i in range(len(struct_profiles))] 
			     for j in range(len(clades)) ],
		'all_mut':[ [ [] for i in range(len(struct_profiles))] 
			     for j in range(len(clades)) ],
		'fiftyfifty':[ [ [] for i in range(len(struct_profiles))] 
			     for j in range(len(clades)) ]}

    aamuts, aatimes, aairr, aagaps = [], [], [], []
    for idx_clade, c in enumerate(clades):
        if len(c.get_terminals()) < 3:
		print 'SKIPPPING CUZ SUBTREE TOO SMALL'
		continue
	c_ids = [ n.m['seq'].name for n in c.get_terminals() ]
	if len(nonzero(greater([len(list(g)) for k, g in it.groupby(sorted(c_ids))],1))[0])>0:
		print 'SKIPPING CUZ THERE ARE TWO COPIES OF SOME F*****G SEQUENCE IN TREE'
		continue          
        all_muts, all_times , all_gaps, all_irr = [], [], [], []
	print
	print 'Clade: {0}'.format(idx_clade)
        for idx_struct, struct_info in enumerate( zip( struct_profiles, exemplar_structs)):
          struct_profile, ex_struct = struct_info
	  ngaps = 0

          #OLD ALIGNMENTS
          calis = ba.MultipleSeqAlignment(\
              [n.m['seq'] for n in c.get_terminals() ])
          #NEW ALIGNMENTS AND REF STRUCTURE
          c_new_ali , stk, struct = infernal.alignment(calis, struct_profile, rfid)
          #REF STRUCTURE PAIRS
          pairs = rutils.stk_pairs(struct)
	  if len(pairs) != len(ex_struct):
		  raise Exception()
           
          cterms = c.get_terminals()
          for i2, ct in enumerate(cterms):
              lilid =  'N{0}'.format(i2)
              ct.name = lilid
              ct.m['str_seq'] = c_new_ali[i2]
              ct.m['str_seq'].id = lilid
	      ct.m['probs'] = ones(len(c_new_ali[i2]))
          
          #BUILD A TREE
          tr = phy.BaseTree.Tree(c)

          #RUN PAML
          paml_run_id = 'ali_anc_c{0:04}_s{0:03}'.format(idx_clade,idx_struct)
          rstfile= paml.run_paml(tr, c_new_ali, run_id = paml_run_id)
          anc_tree = paml.rst_parser(rstfile) 

          #Label extent and internal nodes with sequences.
          for term in anc_tree.get_terminals():
              #Terminals have old (rfam) alis and new (infernal) alis
              term.m = filter( lambda x: x.name == term.name, cterms)[0].m
          for node in anc_tree.get_nonterminals():
              #Internals only have new alis. m['seq'] = m['str_seq']
              node.m['str_seq'] = node.m['seq']
              node.m['str_seq'].seq = node.m['str_seq'].seq.replace('T', 'U')
          subtree = anc_tree
              
 
          #Evaluate all of the structs on the first pass
          #to have access to mean frequencies of different
          #mutational types in the final score computation
	  
          refnode, refseq = subtree_refseq(subtree, method = refseq_method)
          muts, times, gaps, irresolvables = subtree_count_struct(subtree, pairs)
          all_muts.append(muts)
          all_times.append(times)
	  all_gaps.append(gaps)
	  all_irr.append(irresolvables)
        
	compute_signatures(all_vecs,idx_clade,
			   all_muts,all_times,
			   exemplar_structs,ungapped_ref )
				      
	aamuts.append(all_muts)
	aatimes.append(all_times)
	aairr.append(all_irr)
	aagaps.append(all_gaps)
    outputs = {
	    'all_vecs':all_vecs,
	    'all_muts':aamuts,
	    'all_times':aatimes,
	    'exemplar_structs':exemplar_structs,
	    'reference_seq':ungapped_ref,
	    'thermo_ex_inds':inds,
	    'thermo_embedding':pca_vecs,
	    'title':title,
	    'thermo_aff_type':aff_type,
	    'tree':tree,
	    'run_id':run_id
	    }
	 
    pickle.dump(outputs, open(cfg.dataPath('cs874/runs/{0}.pickle'.format(run_id)),'w'))
    return(outputs)