Beispiel #1
0
 def dummy(self, reftree, alignment):
     seqs = SeqGroup(sequences=alignment, format='fasta')
     entries = seqs.get_entries()
     seq0 = entries[0][1]
     dummyseq = seq0[:-50] + "A"*50
     seqs.set_seq(name = "dummy", seq = dummyseq)
     fout = self.tmppath + "/dummy" + self.name + ".fa"
     seqs.write(format='fasta', outfile=fout) 
     self.run(reftree = reftree, alignment = fout)
     self.clean()
     os.remove(fout)
     return self.tmppath + "/" + "RAxML_portableTree." + self.name + ".jplace"
Beispiel #2
0
def epa_2_ptp(epa_jp, ref_jp, full_alignment, min_lw = 0.5, debug = False):
    placements = epa_jp.get_placement()
    reftree = Tree(epa_jp.get_std_newick_tree())
    allnodes = reftree.get_descendants()
    species_list = []
    
    placemap = {}
    """find how many edges are used for placement, and create a map to store """
    for placement in placements:
        edges = placement["p"]
        curredge = edges[0][0]
        lw = edges[0][2] 
        if lw >= min_lw:
            placemap[curredge] = placemap.get(curredge, [])

    """group taxa name by placement branch"""
    for placement in placements:
        edges = placement["p"]
        taxa_names = placement["n"]
        curredge = edges[0][0]
        lw = edges[0][2] 
        if lw >= min_lw:
            a = placemap[curredge] 
            a.extend(taxa_names)
            placemap[curredge]  = a

    groups = placemap.items()
    cnt_leaf = 0
    cnt_inode = 0
    
    """check each placement edge""" 
    for i,item in enumerate(groups):
        place_branch_name = item[0]
        seqset = item[1]
        if len(seqset) < 4:
            species_list.append(seqset)
        else:
            branch_alignment = SeqGroup()
            for taxa in seqset:
                branch_alignment.set_seq(taxa, full_alignment.get_seq(taxa))
            species = build_tree_run_ptp(branch_alignment, ref_jp.get_rate())
            species_list.extend(species)
    return species_list
 def get_ref_alignment(self):
     entries = self.jdata["sequences"]
     alignment = SeqGroup()
     for entr in entries:
         alignment.set_seq(entr[0], entr[1])
     return alignment