else:
                frameshift_cases.append([cluster, genome_id, protein_id])

        if len(curated_protein_list) < 2:  # Only take those clusters with 3 sequences or more
            clusters_too_short.append(cluster)
            continue

        #Alignments and output data

        unaligned_DNA = LoadSeqs(data=curated_protein_list, moltype=DNA, aligned=False)

        unaligned_AA = unaligned_DNA.getTranslation()

        #Generate alignments using muscle
        aligned_AA = mafft_align_unaligned_seqs(unaligned_AA, PROTEIN)

        #Replace the aminoacid sequences with the nucleotide sequence
        aligned_DNA = aligned_AA.replaceSeqs(unaligned_DNA)

        #Output files
        aligned_dna_file = dna_aligned_folder + "/" + cluster + ".fna"
        aligned_aa_file = protein_alignment_folder + "/" + cluster + ".faa"
        protein_tree_output = protein_tree_folder + "/" + cluster + ".tre"
        nucleotide_tree_output = dna_tree_folder + "/" + cluster + ".tre"

        unaligned_DNA.writeToFile(dna_unaligned_folder + "/" + cluster + ".fna", format="fasta")
        unaligned_AA.writeToFile(protein_unaligned_folder + "/" + cluster + ".faa", format="fasta")
        aligned_DNA.writeToFile(aligned_dna_file, format="fasta")
        aligned_AA.writeToFile(aligned_aa_file, format="fasta")
Beispiel #2
0
#!/usr/bin/env python
# taken from http://pycogent.sourceforge.net/
from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs
from cogent.core.moltype import DNA
from cogent import LoadSeqs
from cogent.app.raxml import build_tree_from_alignment as raxml_build_tree
unaligned_seqs = LoadSeqs(filename='data/test2.fasta', aligned=False)
aln = mafft_align_unaligned_seqs(unaligned_seqs, DNA)
#raxml_tree = raxml_build_tree(aln, DNA)
#print raxml_tree
Beispiel #3
0
        if len(curated_protein_list
               ) < 2:  # Only take those clusters with 3 sequences or more
            clusters_too_short.append(cluster)
            continue

        #Alignments and output data

        unaligned_DNA = LoadSeqs(data=curated_protein_list,
                                 moltype=DNA,
                                 aligned=False)

        unaligned_AA = unaligned_DNA.getTranslation()

        #Generate alignments using muscle
        aligned_AA = mafft_align_unaligned_seqs(unaligned_AA, PROTEIN)

        #Replace the aminoacid sequences with the nucleotide sequence
        aligned_DNA = aligned_AA.replaceSeqs(unaligned_DNA)

        #Make protein trees using FastTree
        protein_tree = build_tree_fasttree(aligned_AA, PROTEIN)
        protein_tree_output = open(
            protein_tree_folder + "/" + cluster + ".tre", 'w')
        protein_tree_output.write(protein_tree.getNewick(with_distances=True))
        protein_tree_output.close()

        #Make nucleotide trees using FastTree
        nucleotide_tree = build_tree_fasttree(aligned_DNA, DNA)
        nucleotide_tree_output = open(dna_tree_folder + "/" + cluster + ".tre",
                                      'w')