def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) # Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1) # Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("dots", True) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: if os.path.isfile("output.best.nex"): # Prank v.130820 and perhaps earlier use ".best.*" output names nex_fname = "output.best.nex" elif os.path.isfile("output.2.nex"): # Older Prank versions use ".2.*" output names nex_fname = "output.2.nex" else: raise RuntimeError("Can't find PRANK's NEXUS output (*.nex)") align = AlignIO.read(nex_fname, "nexus") for old, new in zip(records, align): # Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) # infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: # See bug 3119, # Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: align = AlignIO.read("output.2.nex", "nexus") for old, new in zip(records, align): #Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) #infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: #See bug 3119, #Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(open(self.infile1),"fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), prank_exe + \ " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_("Total time" in stdout.read()) self.assertEqual(stderr.read(), "") self.assertEqual(str(result._cl), str(cmdline)) out_handle = open("output.2.nex", "r") align = AlignIO.read(out_handle, "nexus") out_handle.close() for old, new in zip(records, align) : #Prank automatically reduces name to 9 chars self.assertEqual(old.id[:9], new.id) #infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-",""))
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(open(self.infile1), "fasta")) # Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) # Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), prank_exe + " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32") ) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_("Total time" in child.stdout.read()) self.assertEqual(child.stderr.read(), "") align = AlignIO.read(open("output.2.nex"), "nexus") for old, new in zip(records, align): # Prank automatically reduces name to 9 chars self.assertEqual(old.id[:9], new.id) # infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) del child
def test_Prank_simple(self): """Simple round-trip through app with infile. output.?.??? files written to cwd - no way to redirect """ cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(error, "") self.assertTrue("Total time" in output)
def test_Prank_simple(self): """Simple round-trip through app with infile. output.?.??? files written to cwd - no way to redirect """ cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) self.assertEqual(str(cmdline), prank_exe + " -d=Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(error, "") self.assertTrue("Total time" in output)
def test_Prank_simple(self): """Simple round-trip through app with infile. output.?.??? files written to cwd - no way to redirect """ cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) self.assertEqual(str(cmdline), prank_exe + " -d=Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_("Total time" in stdout.read()) self.assertEqual(stderr.read(), "") self.assertEqual(str(result._cl), str(cmdline))
def test_Prank_simple(self): """Simple round-trip through app with infile. output.?.??? files written to cwd - no way to redirect """ cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) self.assertEqual(str(cmdline), prank_exe + " -d=Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32") ) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_("Total time" in child.stdout.read()) self.assertEqual(child.stderr.read(), "") del child
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = f"{self.output}.{prank_ext}" if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline( prank_exe, d=self.input, convert=True, f=prank_number, o=f'"{self.output}"', ) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + f' -d={self.input} -o="{self.output}" -f={prank_number} -convert', ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) message, error = cmdline() self.assertIn("PRANK", message) self.assertIn((f"converting '{self.input}' to '{filename}'"), message, message) self.assertEqual(error, "") self.assertTrue(os.path.isfile(filename)) old = AlignIO.read(self.input, "fasta") # Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(filename, format) self.assertEqual(len(old), len(new)) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(old_r.seq, new_r.seq) os.remove(filename)
def get_alignment_commands(fastafile_name, outdir, aligner, threads): geneName = fastafile_name.split('/')[-1].split('.')[0] if aligner == "prank": command = PrankCommandline(d=fastafile_name, o=geneName, f=8, codon=True) elif (threads > 3): if aligner == "mafft": command = MafftCommandline(input=fastafile_name, auto=True, nuc=True) elif aligner == "clustal": command = ClustalOmegaCommandline( infile=fastafile_name, outfile=outdir + "aligned_gene_sequences/" + geneName + ".aln.fas", seqtype="DNA") elif (threads <= 3): if aligner == "mafft": command = MafftCommandline(input=fastafile_name, auto=True, thread=threads, nuc=True) elif aligner == "clustal": command = ClustalOmegaCommandline( infile=fastafile_name, outfile=outdir + "aligned_gene_sequences/" + geneName + ".aln.fas", seqtype="DNA", threads=threads) return (command, fastafile_name)
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = "%s.%s" % (self.output, prank_ext) if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline( prank_exe, d=self.input, convert=True, f=prank_number, o='"%s"' % self.output, ) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=%s" % self.input + ' -o="%s"' % self.output + " -f=%i" % prank_number + " -convert", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) message, error = cmdline() self.assertIn("PRANK", message) self.assertIn(("converting '%s' to '%s'" % (self.input, filename)), message, message) self.assertEqual(error, "") self.assertTrue(os.path.isfile(filename)) old = AlignIO.read(self.input, "fasta") # Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(filename, format) self.assertEqual(len(old), len(new)) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(old_r.seq, new_r.seq) os.remove(filename)
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = "%s.%s" % (self.output, prank_ext) if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline(prank_exe, d=self.input, convert=True, f=prank_number, o='"%s"' % self.output) self.assertEqual(str(cmdline), prank_exe \ + ' -d=%s' % self.input \ + ' -o="%s"' % self.output \ + ' -f=%i' % prank_number \ + ' -convert') self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) message = stdout.read().strip() self.assert_(("PRANK: converting '%s' to '%s'" % (self.input, filename)) \ in message, message) self.assertEqual(stderr.read(), "") self.assertEqual(str(result._cl), str(cmdline)) self.assert_(os.path.isfile(filename)) old = AlignIO.read(open(self.input), "fasta") #Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(open(filename), format) assert len(old) == len(new) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(str(old_r.seq), str(new_r.seq)) os.remove(filename)
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: align = AlignIO.read("output.2.nex", "nexus") for old, new in zip(records, align): #Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) #infile1 has alignment gaps in it self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: #See bug 3119, #Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(open(self.infile1), "fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), prank_exe + \ " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_("Total time" in stdout.read()) self.assertEqual(stderr.read(), "") self.assertEqual(str(result._cl), str(cmdline)) out_handle = open("output.2.nex", "r") align = AlignIO.read(out_handle, "nexus") out_handle.close() for old, new in zip(records, align): #Prank automatically reduces name to 9 chars self.assertEqual(old.id[:9], new.id) #infile1 has alignment gaps in it self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(open(self.infile1),"fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), prank_exe + \ " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) return_code = child.wait() self.assertEqual(return_code, 0) self.assertTrue("Total time" in child.stdout.read()) self.assertEqual(child.stderr.read(), "") try: align = AlignIO.read(open("output.2.nex"), "nexus") for old, new in zip(records, align): #Old versions of Prank reduced name to 9 chars self.assertTrue(old.id==new.id or old.id[:9]==new.id) #infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-","")) except NexusError: #See bug 3119, #Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass del child
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS. output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) # Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1) # Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("dots", True) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertIn("Total time", stdout) self.assertEqual(stderr, "") try: if os.path.isfile("output.best.nex"): # Prank v.130820 and perhaps earlier use ".best.*" output names nex_fname = "output.best.nex" elif os.path.isfile("output.2.nex"): # Older Prank versions use ".2.*" output names nex_fname = "output.2.nex" else: raise RuntimeError("Can't find PRANK's NEXUS output (*.nex)") align = AlignIO.read(nex_fname, "nexus") for old, new in zip(records, align): # Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) # infile1 has alignment gaps in it self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: # See bug 3119, # Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) # i.e. True # Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01" + " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + " -once -skipins -realbranches") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout, stdout)
def run_multiple_sequence_alignment(records, workdir, msa): """ This runs the MSA, user can choose between emma, clustalw (old and busted), clustal omega (recommended for proteins and also uses HMM), MUSCLE or MAFFT (recommended for nucleotide data, and MUSCLE should be pretty fast), T-Coffee (good for distantly related sequences). FUTURE: Add more iterative methods to improve runtime? Add HMMER? HHpred is also quite fast """ #get filename for fasta file sequence_list_file = os.path.join(workdir, "msa.fasta") #write sequences SeqIO.write(records, sequence_list_file, "fasta") #prepare filenames for MSA output outfile = os.path.join(workdir, "msa.aln") treefile = os.path.join(workdir, "msa.dnd") #Prepare command line according to chosen algorithm if msa.lower() == "emma": #output is fasta print "Aligning by emma" cmd = EmmaCommandline(sequence=sequence_list_file, outseq=outfile, dendoutfile=treefile) elif msa.lower() == "clustalo" or msa.lower( ) == "clustal_omega" or msa.lower() == "clustal-omega": print "Aligning by Clustal Omega" cmd = ClustalOmegaCommandline(infile=sequence_list_file, outfile=outfile, verbose=True, auto=True, guidetree_out=treefile, outfmt="clu", force=True) elif msa.lower() == "t-coffee" or msa.lower( ) == "t_coffee": #should output tree file automatically print "Aligning by T-Coffeee" cmd = TCoffeeCommandline(infile=sequence_list_file, output="clustalw", outfile=outfile) elif msa.lower() == "muscle": print "Aligning by MUSCLE" #cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile, clw=True) cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile) elif msa.lower() == "mafft": #probably gonna save tree as input.tree print "Aligning by MAFFT" cmd = MafftCommandline(input=sequence_list_file, clustalout=True, treeout=True) elif msa.lower() == "clustalw" or msa.lower() == "clustalw2": print "Aligning by ClustalW2" cmd = ClustalwCommandline("clustalw", infile=sequence_list_file, outfile=outfile, tree=True, newtree=treefile) elif msa.lower( ) == "prank": #output is fasta, tree will be outputted to .dnd file? print "Aligning by PRANK" cmd = PrankCommandline(d=sequence_list_file, o=outfile, f=8, showtree=True, noxml=True) elif msa.lower() == "msaprobs": #doesn't use a guide tree print "Aligning by MSAprobs" cmd = MSAProbsCommandline(infile=sequence_list_file, outfile=outfile, clustalw=True) elif msa.lower() == "probcons": print "Aligning by ProbCons" cmd = ProbconsCommandline(input=sequence_list_file, clustalw=True) elif msa.lower( ) == "dialign": #phylip tree should be created automatically, names are a mystery? print "Aligning by Dialign" cmd = DialignCommandline(input=sequence_list_file, cw=True, fn=outfile) else: raise BaseException( "Only Multiple Sequence Alignment algorithms currently supported are emma, clustalo, t_coffee, muscle and mafft" ) #Execute the command stdout, stderr = cmd() #For algorithms that don't have an option to save ouptut to file, capture the stdout if msa.lower() == "mafft" or msa.lower() == "probcons": with open(outfile, "w") as handle: handle.write(stdout)
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) # i.e. True # Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01" + " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + " -once -skipins -realbranches", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertIn("Total time", stdout)
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-noxml", True) cmdline.set_parameter("notree", True) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) # i.e. True # Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual( str(cmdline), prank_exe + " -d=Fasta/fa01 -noxml" + " -notree -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + " -once -skipins -realbranches", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32") ) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_("Total time" in child.stdout.read()) self.assertEqual(child.stderr.read(), "") del child
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-noxml", True) cmdline.set_parameter("notree", True) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) #i.e. True #Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual(str(cmdline), prank_exe + " -d=Fasta/fa01 -noxml" + \ " -notree -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + \ " -once -skipins -realbranches") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(result.return_code, 0) self.assert_("Total time" in stdout.read()) self.assertEqual(stderr.read(), "") self.assertEqual(str(result._cl), str(cmdline))