Exemple #1
0
def build_hmm_from_tree(base,tree_name,aln_name,msa_dir,hmm_dir):
    '''
    Reads tree and corresponding msa and create an MSA & HMM for each internal node.
    '''
    
    # Annotate internal nodes with name of corresponding HMM.
    pt = PhyloTree(tree_name,alignment=aln_name,alg_format="fasta")
    i_node = 0
    for node in pt.traverse():
        if not node.is_leaf():
            node_name = 'node%s' % (str(i_node))
            node.add_features(hmm=node_name)
            i_node += 1
            
            # make msa for node
            msa_string = []
            for leaf in node.iter_leaves():
                msa_string.append(">%s" % leaf.name)
                msa_string.append(str(leaf.sequence))
            msa_string = '\n'.join(msa_string)
            msa = open('%s%s.aln' % (msa_dir, node_name),'w'); msa.write(msa_string); msa.close()
            
            # build HMM for node
            check_call(['python', 'build_hmmer3_hmm_from_alignment.py', '--name',
                     '%s%s' % (hmm_dir, node_name),
                     '%s%s.aln' % (msa_dir, node_name)])
                     
    #concatenate HMMs into one file for Hmmscan
    os.system('cat %s*.hmm > %s%s_concat.hmm' %
              (hmm_dir, hmm_dir, base))
    return pt
def integrate_pwids_into_tree(tree, alignment):
    '''Takes a tree and an alignment and returns a new tree with values of pwid added to each
    node in the tree as node.pwid.'''
    pt = PhyloTree(tree,alignment=alignment,alg_format="fasta")
    for ind, node in enumerate(pt.traverse()):
        node.node_kerf_name = 'node%s' % str(ind).zfill(3)
        # For later kerf and sh functions
        node.kerf_pass = False
        node.sh_pass = False
        if not node.is_leaf():
            node.min_pwid = get_min_pwid_of_leaves(node.get_leaves())
        else:
            node.min_pwid = 1.0
    return pt
Exemple #3
0
def build_hmm_from_tree(tree_name, aln_name, msa_dir, hmm_dir):
    '''
    Reads tree and corresponding msa and create an MSA & HMM for each internal node.
    '''

    # Annotate internal nodes with name of corresponding HMM.
    pt = PhyloTree(tree_name, alignment=aln_name, alg_format="fasta")
    i_node = 0
    for node in pt.traverse():
        if not node.is_leaf():
            node_name = 'node%s' % (str(i_node))
            #print node_name
            #print node
            node.add_features(hmm=node_name)

            i_node += 1

            # make msa for node
            msa_string = []
            for leaf in node.iter_leaves():
                msa_string.append(">%s" % leaf.name)
                msa_string.append(str(leaf.sequence))
            msa_string = '\n'.join(msa_string)
            msa = open('%s%s.aln' % (msa_dir, node_name), 'w')
            msa.write(msa_string)
            msa.close()

            # build HMM for node
            check_call([
                'build_hmmer3_hmm_from_alignment.py', '--name',
                '%s%s' % (hmm_dir, node_name),
                '%s%s.aln' % (msa_dir, node_name)
            ])

    #concatenate HMMs into one file for Hmmscan
    os.system('cat %s*.hmm > %sconcat.hmm' % (hmm_dir, hmm_dir))
    return pt
Exemple #4
0
#########################################################################################
print "Reading complete ancestral sequence data generated through R"

rtree=PhyloTree(intree)    # Tree for "R" generated patterns
tree=ape.read_tree(intree)

rlist=[]
ropf = open(ressurect_file, "r")    # rdata.dat is a rgp.R generated output file
for tab in ropf.readlines():
    tab=tab.rstrip()
    rlist.append(tab.split(" "))
ropf.close()

ori=np.array(rlist)

for node in rtree.traverse("postorder"):    # Patterns are being linked to their corresponding nodes
    if node.is_leaf():
        node.add_features(data=[None for i in range(len(rlist[0])-1)])  # Its rlist[0]-1, because nucleotides begins with name of species
        for i in range(len(ori[:,0])):
            if '"'+node.name+'"' == ori[:,0][i] :
                node.add_features(rtoken=i+1)
    else :
        node.add_features(data=[None for i in range(len(rlist[0])-1)])
        node.add_features(rtoken=None)

for node in rtree.traverse("postorder"):
    if node.is_leaf():
        node.data=map(lambda x: x, rlist[node.rtoken-1][1:])    # Its the sequence after name
        node.up.rtoken=int(ph.Ancestors(tree, node.rtoken, "parent")[0])
    else:
        try:
Exemple #5
0
    time.time() - start_time)
start_time = time.time()

rtree = PhyloTree(intree)  # Tree for "R" generated patterns
tree = ape.read_tree(intree)

rlist = []
ropf = open(ressurect_file, "r")  # rdata.dat is a rgp.R generated output file
for tab in ropf.readlines():
    tab = tab.rstrip()
    rlist.append(tab.split(" "))
ropf.close()

ori = np.array(rlist)

for node in rtree.traverse(
        "postorder"):  # Patterns are being linked to their corresponding nodes
    if node.is_leaf():
        node.add_features(data=[
            None for i in range(len(rlist[0]) - 1)
        ])  # Its rlist[0]-1, because nucleotides begins with name of species
        for i in range(len(ori[:, 0])):
            if '"' + node.name + '"' == ori[:, 0][i]:
                node.add_features(rtoken=i + 1)
    else:
        node.add_features(data=[None for i in range(len(rlist[0]) - 1)])
        node.add_features(rtoken=None)

for node in rtree.traverse("postorder"):
    if node.is_leaf():
        node.data = map(lambda x: x,
                        rlist[node.rtoken -