def build_hmm_from_tree(base,tree_name,aln_name,msa_dir,hmm_dir): ''' Reads tree and corresponding msa and create an MSA & HMM for each internal node. ''' # Annotate internal nodes with name of corresponding HMM. pt = PhyloTree(tree_name,alignment=aln_name,alg_format="fasta") i_node = 0 for node in pt.traverse(): if not node.is_leaf(): node_name = 'node%s' % (str(i_node)) node.add_features(hmm=node_name) i_node += 1 # make msa for node msa_string = [] for leaf in node.iter_leaves(): msa_string.append(">%s" % leaf.name) msa_string.append(str(leaf.sequence)) msa_string = '\n'.join(msa_string) msa = open('%s%s.aln' % (msa_dir, node_name),'w'); msa.write(msa_string); msa.close() # build HMM for node check_call(['python', 'build_hmmer3_hmm_from_alignment.py', '--name', '%s%s' % (hmm_dir, node_name), '%s%s.aln' % (msa_dir, node_name)]) #concatenate HMMs into one file for Hmmscan os.system('cat %s*.hmm > %s%s_concat.hmm' % (hmm_dir, hmm_dir, base)) return pt
def integrate_pwids_into_tree(tree, alignment): '''Takes a tree and an alignment and returns a new tree with values of pwid added to each node in the tree as node.pwid.''' pt = PhyloTree(tree,alignment=alignment,alg_format="fasta") for ind, node in enumerate(pt.traverse()): node.node_kerf_name = 'node%s' % str(ind).zfill(3) # For later kerf and sh functions node.kerf_pass = False node.sh_pass = False if not node.is_leaf(): node.min_pwid = get_min_pwid_of_leaves(node.get_leaves()) else: node.min_pwid = 1.0 return pt
def build_hmm_from_tree(tree_name, aln_name, msa_dir, hmm_dir): ''' Reads tree and corresponding msa and create an MSA & HMM for each internal node. ''' # Annotate internal nodes with name of corresponding HMM. pt = PhyloTree(tree_name, alignment=aln_name, alg_format="fasta") i_node = 0 for node in pt.traverse(): if not node.is_leaf(): node_name = 'node%s' % (str(i_node)) #print node_name #print node node.add_features(hmm=node_name) i_node += 1 # make msa for node msa_string = [] for leaf in node.iter_leaves(): msa_string.append(">%s" % leaf.name) msa_string.append(str(leaf.sequence)) msa_string = '\n'.join(msa_string) msa = open('%s%s.aln' % (msa_dir, node_name), 'w') msa.write(msa_string) msa.close() # build HMM for node check_call([ 'build_hmmer3_hmm_from_alignment.py', '--name', '%s%s' % (hmm_dir, node_name), '%s%s.aln' % (msa_dir, node_name) ]) #concatenate HMMs into one file for Hmmscan os.system('cat %s*.hmm > %sconcat.hmm' % (hmm_dir, hmm_dir)) return pt
######################################################################################### print "Reading complete ancestral sequence data generated through R" rtree=PhyloTree(intree) # Tree for "R" generated patterns tree=ape.read_tree(intree) rlist=[] ropf = open(ressurect_file, "r") # rdata.dat is a rgp.R generated output file for tab in ropf.readlines(): tab=tab.rstrip() rlist.append(tab.split(" ")) ropf.close() ori=np.array(rlist) for node in rtree.traverse("postorder"): # Patterns are being linked to their corresponding nodes if node.is_leaf(): node.add_features(data=[None for i in range(len(rlist[0])-1)]) # Its rlist[0]-1, because nucleotides begins with name of species for i in range(len(ori[:,0])): if '"'+node.name+'"' == ori[:,0][i] : node.add_features(rtoken=i+1) else : node.add_features(data=[None for i in range(len(rlist[0])-1)]) node.add_features(rtoken=None) for node in rtree.traverse("postorder"): if node.is_leaf(): node.data=map(lambda x: x, rlist[node.rtoken-1][1:]) # Its the sequence after name node.up.rtoken=int(ph.Ancestors(tree, node.rtoken, "parent")[0]) else: try:
time.time() - start_time) start_time = time.time() rtree = PhyloTree(intree) # Tree for "R" generated patterns tree = ape.read_tree(intree) rlist = [] ropf = open(ressurect_file, "r") # rdata.dat is a rgp.R generated output file for tab in ropf.readlines(): tab = tab.rstrip() rlist.append(tab.split(" ")) ropf.close() ori = np.array(rlist) for node in rtree.traverse( "postorder"): # Patterns are being linked to their corresponding nodes if node.is_leaf(): node.add_features(data=[ None for i in range(len(rlist[0]) - 1) ]) # Its rlist[0]-1, because nucleotides begins with name of species for i in range(len(ori[:, 0])): if '"' + node.name + '"' == ori[:, 0][i]: node.add_features(rtoken=i + 1) else: node.add_features(data=[None for i in range(len(rlist[0]) - 1)]) node.add_features(rtoken=None) for node in rtree.traverse("postorder"): if node.is_leaf(): node.data = map(lambda x: x, rlist[node.rtoken -