else: frameshift_cases.append([cluster, genome_id, protein_id]) if len(curated_protein_list) < 2: # Only take those clusters with 3 sequences or more clusters_too_short.append(cluster) continue #Alignments and output data unaligned_DNA = LoadSeqs(data=curated_protein_list, moltype=DNA, aligned=False) unaligned_AA = unaligned_DNA.getTranslation() #Generate alignments using muscle aligned_AA = mafft_align_unaligned_seqs(unaligned_AA, PROTEIN) #Replace the aminoacid sequences with the nucleotide sequence aligned_DNA = aligned_AA.replaceSeqs(unaligned_DNA) #Output files aligned_dna_file = dna_aligned_folder + "/" + cluster + ".fna" aligned_aa_file = protein_alignment_folder + "/" + cluster + ".faa" protein_tree_output = protein_tree_folder + "/" + cluster + ".tre" nucleotide_tree_output = dna_tree_folder + "/" + cluster + ".tre" unaligned_DNA.writeToFile(dna_unaligned_folder + "/" + cluster + ".fna", format="fasta") unaligned_AA.writeToFile(protein_unaligned_folder + "/" + cluster + ".faa", format="fasta") aligned_DNA.writeToFile(aligned_dna_file, format="fasta") aligned_AA.writeToFile(aligned_aa_file, format="fasta")
#!/usr/bin/env python # taken from http://pycogent.sourceforge.net/ from cogent.app.mafft import align_unaligned_seqs as mafft_align_unaligned_seqs from cogent.core.moltype import DNA from cogent import LoadSeqs from cogent.app.raxml import build_tree_from_alignment as raxml_build_tree unaligned_seqs = LoadSeqs(filename='data/test2.fasta', aligned=False) aln = mafft_align_unaligned_seqs(unaligned_seqs, DNA) #raxml_tree = raxml_build_tree(aln, DNA) #print raxml_tree
if len(curated_protein_list ) < 2: # Only take those clusters with 3 sequences or more clusters_too_short.append(cluster) continue #Alignments and output data unaligned_DNA = LoadSeqs(data=curated_protein_list, moltype=DNA, aligned=False) unaligned_AA = unaligned_DNA.getTranslation() #Generate alignments using muscle aligned_AA = mafft_align_unaligned_seqs(unaligned_AA, PROTEIN) #Replace the aminoacid sequences with the nucleotide sequence aligned_DNA = aligned_AA.replaceSeqs(unaligned_DNA) #Make protein trees using FastTree protein_tree = build_tree_fasttree(aligned_AA, PROTEIN) protein_tree_output = open( protein_tree_folder + "/" + cluster + ".tre", 'w') protein_tree_output.write(protein_tree.getNewick(with_distances=True)) protein_tree_output.close() #Make nucleotide trees using FastTree nucleotide_tree = build_tree_fasttree(aligned_DNA, DNA) nucleotide_tree_output = open(dna_tree_folder + "/" + cluster + ".tre", 'w')