def calculate_distance_of_one_leaf_string_input(tree1_str, tree2_str, leaf_label):
    '''
    See description for calculate_distance_of_one_leaf_between_trees.
    '''
    calculated_distance = 0
    tree_1 = Tree(tree1_str)
    tree_2 = Tree(tree2_str)
    for node in tree_1.iter_leaf_names():
        difference_in_leaf_distance = abs(tree_1.get_distance(leaf_label, node)
                                       - tree_2.get_distance(leaf_label, node))
        calculated_distance += difference_in_leaf_distance
    return calculated_distance
def format_alignment(fasta, tree, outdir):
    
    treeroot = tree
    fastaroot = path.join(fasta, "*/*prank.best.fas")
    
    check_dir(outdir)


    for infile in glob(fastaroot):
        
        # print progress
        print infile
        
        basename = path.basename(infile).partition('.')[0]
        basename = "".join(basename.split("_")[0] + "_" + basename.split("_")[1])
        prefix = basename.partition('_')[0][:2]
        
        fastafile = infile 
        treedir = path.join(treeroot, prefix)
        treefile = path.join(treedir, basename + '.nh')
        
        # create the first 2 directories (fasta_out, fasta_AA_out)
        
        fasta_out_dir = path.join(outdir, "fasta")
        check_dir(fasta_out_dir)
        fasta_AA_out_dir = path.join(outdir, "fasta_AA")
        check_dir(fasta_AA_out_dir)
        
        fasta_out_subdir = path.join(fasta_out_dir, prefix)
        check_dir(fasta_out_subdir)
        fasta_out_file_path = path.join(fasta_out_subdir, "".join(basename + ".fa"))
        fasta_AA_out_subdir = path.join(fasta_AA_out_dir, prefix)
        check_dir(fasta_AA_out_subdir)
        fasta_AA_out_file_path = path.join(fasta_AA_out_subdir, "".join(basename + ".fa"))
        
        fasta_out_file = open(fasta_out_file_path, "w")
        fasta_AA_out_file = open(fasta_AA_out_file_path, "w")        

          
        for ID in SeqIO.parse(fastafile,"fasta", alphabet=IUPAC.unambiguous_dna):
            
            tree_ids = Tree(newick=treefile)
            for tree_id in tree_ids.iter_leaf_names():
                
                if tree_id.find(ID.id) != -1:
                    #print ID.id
                    ID.id = tree_id
                    #ID.name = ""
                    ID.description = ""
                    #print ID.id
                    #print ID
                    
                    # write the normal fasta out
                    SeqIO.write(ID, fasta_out_file, "fasta")
                    
                    # translate cDNA and write AA fasta
                    aa_seq = []
                    coding_dna = ID.seq
                    #print coding_dna
                    for codon in grouper(coding_dna, 3):
                        cog = "".join(codon)
                        if cog == "---":
                            aa_seq.append("-")
                        else:
                            cog_aa = translate(cog)
                            aa_seq.append(cog_aa)
                    aa_seq = "".join(aa_seq)

                    ID = SeqRecord(Seq(aa_seq, IUPAC.protein), id = ID.id, name = ID.name)
                    ID.description = ""

                    SeqIO.write(ID, fasta_AA_out_file, "fasta")
                    
        fasta_out_file.close()
        fasta_AA_out_file.close()
        
        phy_out_dir = path.join(outdir, "phylip")
        check_dir(phy_out_dir)
        phy_AA_out_dir = path.join(outdir, "phylip_AA")
        check_dir(phy_AA_out_dir)
        
        phy_out_subdir = path.join(phy_out_dir, prefix)
        check_dir(phy_out_subdir)
        phy_out_file_path = path.join(phy_out_subdir, "".join(basename + ".phy"))
        phy_AA_out_subdir = path.join(phy_AA_out_dir, prefix)
        check_dir(phy_AA_out_subdir)
        phy_AA_out_file_path = path.join(phy_AA_out_subdir, "".join(basename + ".phy"))

        fasta_alignment = open(fasta_out_file_path, "rU")
        fasta_AA_alignment = open(fasta_AA_out_file_path, "rU")
        
        phy_out_file = open(phy_out_file_path, "w")
        phy_AA_out_file = open(phy_AA_out_file_path, "w")
                        
        alignments = AlignIO.parse(fasta_alignment, "fasta")
        AlignIO.write(alignments, phy_out_file, "phylip-relaxed")
        
        fasta_alignment.close()
        phy_out_file.close()

        alignments_AA = AlignIO.parse(fasta_AA_alignment, "fasta")       
        AlignIO.write(alignments_AA, phy_AA_out_file, "phylip-relaxed")

        fasta_AA_alignment.close()
        phy_AA_out_file.close()
Exemple #3
0
def format_trees(treeroot, fastaroot, outroot):
    
    fastafiles = path.join(fastaroot, "*/*.fa")
    
    if not os.path.exists(outroot):
        os.makedirs(outroot)
    
    rooted_out_dir = path.join(outroot, "rooted")
    check_dir(rooted_out_dir)
    unrooted_out_dir = path.join(outroot, "unrooted")
    check_dir(unrooted_out_dir)
    
    
    for infile in glob(fastafiles):
        
        print infile
        
        basename = path.basename(infile).partition('.')[0]
        basename = "".join(basename.split("_")[0] + "_" + basename.split("_")[1])
        prefix = basename.partition('_')[0][:2]
        
        fastafile = infile 
        treedir = path.join(treeroot, prefix)
        treefile = path.join(treedir, basename + '.nh')
        
        # make the tree object
        tree = Tree(newick=treefile)
        
        # loop that deletes nodes that are not in the alignment
        for leaf_name in tree.iter_leaf_names():
            
            name_check = []
            
            for ID in SeqIO.parse(fastafile, "fasta"):
                if ID.id in leaf_name:
                    name_check.append(True)
                else:
                    name_check.append(False)
            
            if any(name_check):
                continue
            else:
                leaf = tree.search_nodes(name=leaf_name)[0]
                leaf.delete()
                #node = leaf.up
                #node.remove_child(leaf)
                    
            # create the directories for rooted trees
            rooted_out_sub_dir = path.join(rooted_out_dir, prefix)
            check_dir(rooted_out_sub_dir)
            rooted_out_file = path.join(rooted_out_sub_dir, basename + ".nh")
            
            
            
            tree.write(outfile=rooted_out_file, format=6)
            
            # create subdirectories for unrooted trees
            unrooted_out_sub_dir = path.join(unrooted_out_dir, prefix)
            check_dir(unrooted_out_sub_dir)
            unrooted_out_file = path.join(unrooted_out_sub_dir, basename + ".nh")
            # unroot the tree
            tree.unroot()
            
            tree.write(outfile=unrooted_out_file, format=6)