def test_blastp(self): """Pairwise BLASTP search""" global exe_names cline = Applications.NcbiblastpCommandline(exe_names["blastp"], query="Fasta/rose.pro", subject="GenBank/NC_005816.faa", evalue=1) self.assertEqual(str(cline), _escape_filename(exe_names["blastp"]) + " -query Fasta/rose.pro -evalue 1" + " -subject GenBank/NC_005816.faa") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual(return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) self.assertEqual(10, stdoutdata.count("Query= ")) if stdoutdata.count("***** No hits found *****") == 7: # This happens with BLAST 2.2.26+ which is potentially a bug pass else: self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
def get_emboss_version(): """Returns a tuple of three ints, e.g. (6,1,0)""" #Windows and Unix versions of EMBOSS seem to differ in #which lines go to stdout and stderr - so merge them. child = subprocess.Popen(_escape_filename(exes["embossversion"]), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, shell=(sys.platform!="win32")) stdout, stderr = child.communicate() child.stdout.close() # This is both stdout and stderr del child assert stderr is None # Send to stdout instead for line in stdout.split("\n"): if line.strip()=="Reports the current EMBOSS version number": pass elif line.startswith("Writes the current EMBOSS version number"): pass elif line.count(".")==2: return tuple(int(v) for v in line.strip().split(".")) elif line.count(".")==3: #e.g. I installed mEMBOSS-6.2.0.1-setup.exe #which reports 6.2.0.1 - for this return (6,2,0) return tuple(int(v) for v in line.strip().split("."))[:3] else: #Either we can't understand the output, or this is really #an error message not caught earlier (e.g. not in English) raise MissingExternalDependencyError( "Install EMBOSS if you want to use Bio.Emboss (%s)." % line) #In case there was no output at all... raise MissingExternalDependencyError("Could not get EMBOSS version")
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("dots", True) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: align = AlignIO.read("output.2.nex", "nexus") for old, new in zip(records, align): #Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) #infile1 has alignment gaps in it self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: #See bug 3119, #Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_tblastn(self): """Pairwise TBLASTN search.""" global exe_names cline = Applications.NcbitblastnCommandline( exe_names["tblastn"], query="GenBank/NC_005816.faa", subject="GenBank/NC_005816.fna", evalue="1e-6", ) self.assertEqual( str(cline), _escape_filename(exe_names["tblastn"]) + " -query GenBank/NC_005816.faa -evalue 1e-6" + " -subject GenBank/NC_005816.fna", ) child = subprocess.Popen( str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual( return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) self.assertEqual(10, stdoutdata.count("Query= ")) self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
def test_fasta_db_nucl(self): """Test makeblastdb wrapper with nucleotide database""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], input_file="GenBank/NC_005816.fna", dbtype="nucl", hash_index=True, max_file_sz="20MB", parse_seqids=True, taxid=10) self.assertEqual(str(cline), _escape_filename(exe_names["makeblastdb"]) + " -dbtype nucl -in GenBank/NC_005816.fna" " -parse_seqids -hash_index -max_file_sz 20MB" " -taxid 10") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
def _as_list(self): if self.value is None: return [] elif self.is_filename: return [_escape_filename(self.value)] else: return [self.value]
def test_using_stdin(self): """Simple alignment using stdin""" input_file = "Fasta/f002" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file,"fasta")) #Prepare the command... use Clustal output (with a MUSCLE header) cline = MuscleCommandline(muscle_exe, clw=True) self.assertEqual(str(cline).rstrip(), _escape_filename(muscle_exe) + " -clw") self.assertEqual(str(eval(repr(cline))), str(cline)) child = subprocess.Popen(str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) SeqIO.write(records, child.stdin, "fasta") child.stdin.close() #Alignment will now run... align = AlignIO.read(child.stdout, "clustal") align.sort() records.sort(key = lambda rec: rec.id) self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) self.assertEqual(0, child.wait()) child.stdout.close() child.stderr.close() del child
def test_fasta_db_nucl(self): """Test makeblastdb wrapper with nucleotide database.""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], input_file="GenBank/NC_005816.fna", dbtype="nucl", hash_index=True, max_file_sz="20MB", parse_seqids=True, taxid=10) self.assertEqual(str(cline), _escape_filename(exe_names["makeblastdb"]) + " -dbtype nucl -in GenBank/NC_005816.fna" " -parse_seqids -hash_index -max_file_sz 20MB" " -taxid 10") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) #Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True) #Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("notree", True) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -noxml -notree") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: align = AlignIO.read("output.2.nex", "nexus") for old, new in zip(records, align): #Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) #infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: #See bug 3119, #Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = f"{self.output}.{prank_ext}" if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline( prank_exe, d=self.input, convert=True, f=prank_number, o=f'"{self.output}"', ) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + f' -d={self.input} -o="{self.output}" -f={prank_number} -convert', ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) message, error = cmdline() self.assertIn("PRANK", message) self.assertIn((f"converting '{self.input}' to '{filename}'"), message, message) self.assertEqual(error, "") self.assertTrue(os.path.isfile(filename)) old = AlignIO.read(self.input, "fasta") # Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(filename, format) self.assertEqual(len(old), len(new)) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(old_r.seq, new_r.seq) os.remove(filename)
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = "%s.%s" % (self.output, prank_ext) if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline(prank_exe, d=self.input, convert=True, f=prank_number, o='"%s"' % self.output) self.assertEqual( str(cmdline), _escape_filename(prank_exe) + " -d=%s" % self.input + ' -o="%s"' % self.output + " -f=%i" % prank_number + " -convert") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) message, error = cmdline() self.assertIn("PRANK", message) self.assertTrue(("converting '%s' to '%s'" % (self.input, filename)) in message, message) self.assertEqual(error, "") self.assertTrue(os.path.isfile(filename)) old = AlignIO.read(self.input, "fasta") # Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(filename, format) self.assertEqual(len(old), len(new)) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(str(old_r.seq), str(new_r.seq)) os.remove(filename)
def test_simple_clustal_strict(self): """Simple muscle call using strict Clustal output""" input_file = "Fasta/f002" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file,"fasta")) records.sort(key = lambda rec: rec.id) #Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", input_file) #Use clustal output (with a CLUSTAL header) cmdline.set_parameter("clwstrict", True) # Default None treated as False! self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -clwstrict") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) #Didn't use -quiet so there should be progress reports on stderr, align = AlignIO.read(child.stdout, "clustal") align.sort() self.assertTrue(child.stderr.read().strip().startswith("MUSCLE")) self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) return_code = child.wait() self.assertEqual(return_code, 0) child.stdout.close() child.stderr.close() del child
def get_emboss_version(): """Returns a tuple of three ints, e.g. (6,1,0)""" #Windows and Unix versions of EMBOSS seem to differ in #which lines go to stdout and stderr - so merge them. child = subprocess.Popen(_escape_filename(exes["embossversion"]), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, shell=(sys.platform != "win32")) stdout, stderr = child.communicate() child.stdout.close() # This is both stdout and stderr del child assert stderr is None # Send to stdout instead for line in stdout.split("\n"): if line.strip() == "Reports the current EMBOSS version number": pass elif line.startswith("Writes the current EMBOSS version number"): pass elif line.count(".") == 2: return tuple(int(v) for v in line.strip().split(".")) elif line.count(".") == 3: #e.g. I installed mEMBOSS-6.2.0.1-setup.exe #which reports 6.2.0.1 - for this return (6,2,0) return tuple(int(v) for v in line.strip().split("."))[:3] else: #Either we can't understand the output, or this is really #an error message not caught earlier (e.g. not in English) raise MissingExternalDependencyError( "Install EMBOSS if you want to use Bio.Emboss (%s)." % line) #In case there was no output at all... raise MissingExternalDependencyError("Could not get EMBOSS version")
def conversion(self, prank_number, prank_ext, format): """Get PRANK to do a conversion, and check it with SeqIO.""" filename = "%s.%s" % (self.output, prank_ext) if os.path.isfile(filename): os.remove(filename) cmdline = PrankCommandline(prank_exe, d=self.input, convert=True, f=prank_number, o='"%s"' % self.output) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + ' -d=%s' % self.input + ' -o="%s"' % self.output + ' -f=%i' % prank_number + ' -convert') self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) message, error = cmdline() self.assertTrue("PRANK" in message, message) self.assertTrue(("converting '%s' to '%s'" % (self.input, filename)) in message, message) self.assertEqual(error, "") self.assertTrue(os.path.isfile(filename)) old = AlignIO.read(self.input, "fasta") # Hack... if format == "phylip": for record in old: record.id = record.id[:10] new = AlignIO.read(filename, format) self.assertEqual(len(old), len(new)) for old_r, new_r in zip(old, new): self.assertEqual(old_r.id, new_r.id) self.assertEqual(str(old_r.seq), str(new_r.seq)) os.remove(filename)
def test_Prank_simple_with_NEXUS_output(self): """Simple round-trip through app with infile, output in NEXUS output.?.??? files written to cwd - no way to redirect """ records = list(SeqIO.parse(self.infile1, "fasta")) # Try using keyword argument, cmdline = PrankCommandline(prank_exe, d=self.infile1) # Try using a property, cmdline.d = self.infile1 cmdline.f = 17 # NEXUS format cmdline.set_parameter("dots", True) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout) self.assertEqual(stderr, "") try: if os.path.isfile("output.best.nex"): # Prank v.130820 and perhaps earlier use ".best.*" output names nex_fname = "output.best.nex" elif os.path.isfile("output.2.nex"): # Older Prank versions use ".2.*" output names nex_fname = "output.2.nex" else: raise RuntimeError("Can't find PRANK's NEXUS output (*.nex)") align = AlignIO.read(nex_fname, "nexus") for old, new in zip(records, align): # Old versions of Prank reduced name to 9 chars self.assertTrue(old.id == new.id or old.id[:9] == new.id) # infile1 has alignment gaps in it self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", "")) except NexusError: # See bug 3119, # Bio.Nexus can't parse output from prank v100701 (1 July 2010) pass
def test_simple_clustal(self): """Simple muscle call using Clustal output with a MUSCLE header.""" input_file = "Fasta/f002" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file, "fasta")) records.sort(key=lambda rec: rec.id) # noqa: E731 # Prepare the command... use Clustal output (with a MUSCLE header) cmdline = MuscleCommandline(muscle_exe, input=input_file, clw=True) self.assertEqual( str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -clw") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) # Didn't use -quiet so there should be progress reports on stderr, align = AlignIO.read(child.stdout, "clustal") align.sort() # by record.id self.assertTrue(child.stderr.read().strip().startswith("MUSCLE")) return_code = child.wait() self.assertEqual(return_code, 0) child.stdout.close() child.stderr.close() del child self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-", ""), str(old.seq))
def _as_list(self): assert isinstance(self.value, list), \ "Arguments should be a list" assert self.value, "Requires at least one filename" if self.is_filename: return [_escape_filename(v) for v in self.value] else: return [self.value]
def _as_list(self): """Return the command line as list""" self._validate() commandline = [_escape_filename(self.program_name)] for parameter in self.parameters: if parameter.is_set: commandline.extend(parameter._as_list()) return commandline
def test_with_multiple_output_formats(self): """Simple muscle call with multiple output formats.""" input_file = "Fasta/f002" output_html = "temp_f002.html" output_clwstrict = "temp_f002.clw" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file, "fasta")) records.sort(key=lambda rec: rec.id) # noqa: E731 # Prepare the command... use Clustal output (with a MUSCLE header) cmdline = MuscleCommandline( muscle_exe, input=input_file, clw=True, htmlout=output_html, clwstrictout=output_clwstrict, ) self.assertEqual( str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -clw -htmlout temp_f002.html" + " -clwstrictout temp_f002.clw", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) # Clustalw on stdout: align = AlignIO.read(child.stdout, "clustal") align.sort() # Didn't use -quiet so there should be progress reports on stderr, self.assertTrue(child.stderr.read().strip().startswith("MUSCLE")) return_code = child.wait() self.assertEqual(return_code, 0) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) child.stdout.close() child.stderr.close() del child handle = open(output_html) html = handle.read().strip().upper() handle.close() self.assertTrue(html.startswith("<HTML")) self.assertTrue(html.endswith("</HTML>")) # ClustalW strict: align = AlignIO.read(output_clwstrict, "clustal") align.sort() self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) os.remove(output_html) os.remove(output_clwstrict)
def test_Muscle_simple(self): """Simple round-trip through app just infile and outfile""" cmdline = MuscleCommandline(muscle_exe, input=self.infile1, out=self.outfile1) self.assertEqual(str(cmdline), _escape_filename(muscle_exe) + ' -in Fasta/f002 -out "Fasta/temp align out1.fa"') self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(output, "") self.assertTrue("ERROR" not in error)
def test_Prank_simple(self): """Simple round-trip through app with infile. output.?.??? files written to cwd - no way to redirect """ cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(error, "") self.assertTrue("Total time" in output)
def _as_list(self): if self.value is None: return [self.names[0]] if self.is_filename: v = _escape_filename(self.value) else: v = str(self.value) if self.equate: return ["%s=%s" % (self.names[0], v)] else: return [self.names[0], v]
def test_Muscle_profile_with_options(self): """Profile alignment, and switch and valued options""" #Using some keyword arguments, note -stable isn't supported in v3.8 cmdline = MuscleCommandline(muscle_exe, out=self.outfile4, in1=self.infile2, in2=self.infile3, profile=True, stable=True, cluster1="neighborjoining") self.assertEqual(str(cmdline), _escape_filename(muscle_exe) + " -out Fasta/temp_align_out4.fa" + " -profile -in1 Fasta/fa01 -in2 Fasta/f001" + " -cluster1 neighborjoining -stable") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) """
def test_Muscle_profile_simple(self): """Simple round-trip through app doing a profile alignment""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("out", self.outfile3) cmdline.set_parameter("profile", True) cmdline.set_parameter("in1", self.infile2) cmdline.set_parameter("in2", self.infile3) self.assertEqual(str(cmdline), _escape_filename(muscle_exe) + " -out Fasta/temp_align_out3.fa" + " -profile -in1 Fasta/fa01 -in2 Fasta/f001") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(output, "") self.assertTrue("ERROR" not in error) self.assertTrue(error.strip().startswith("MUSCLE"), output)
def test_long(self): """Simple muscle call using long file.""" # Create a large input file by converting some of another example file temp_large_fasta_file = "temp_cw_prot.fasta" records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40] SeqIO.write(records, temp_large_fasta_file, "fasta") # Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", temp_large_fasta_file) # Use fast options cmdline.set_parameter("maxiters", 1) cmdline.set_parameter("diags", True) # Default None treated as False! # Use clustal output cmdline.set_parameter("clwstrict", True) # Default None treated as False! # Shoudn't need this, but just to make sure it is accepted cmdline.set_parameter("maxhours", 0.1) # No progress reports to stderr cmdline.set_parameter("quiet", True) # Default None treated as False! self.assertEqual( str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in temp_cw_prot.fasta -diags -maxhours 0.1" + " -maxiters 1 -clwstrict -quiet", ) self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen( str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) align = AlignIO.read(child.stdout, "clustal") align.sort() records.sort(key=lambda rec: rec.id) # noqa: E731 self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-", ""), str(old.seq)) # See if quiet worked: self.assertEqual("", child.stderr.read().strip()) return_code = child.wait() self.assertEqual(return_code, 0) child.stdout.close() child.stderr.close() del child os.remove(temp_large_fasta_file)
def test_with_multiple_output_formats(self): """Simple muscle call with multiple output formats""" input_file = "Fasta/f002" output_html = "temp_f002.html" output_clwstrict = "temp_f002.clw" self.assertTrue(os.path.isfile(input_file)) records = list(SeqIO.parse(input_file,"fasta")) records.sort(key = lambda rec: rec.id) #Prepare the command... use Clustal output (with a MUSCLE header) cmdline = MuscleCommandline(muscle_exe, input=input_file, clw=True, htmlout = output_html, clwstrictout = output_clwstrict) self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -clw -htmlout temp_f002.html" + " -clwstrictout temp_f002.clw") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) #Clustalw on stdout: align = AlignIO.read(child.stdout, "clustal") align.sort() #Didn't use -quiet so there should be progress reports on stderr, self.assertTrue(child.stderr.read().strip().startswith("MUSCLE")) return_code = child.wait() self.assertEqual(return_code, 0) self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) child.stdout.close() child.stderr.close() del child handle = open(output_html,"rU") html = handle.read().strip().upper() handle.close() self.assertTrue(html.startswith("<HTML")) self.assertTrue(html.endswith("</HTML>")) #ClustalW strict: align = AlignIO.read(output_clwstrict, "clustal") align.sort() self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) os.remove(output_html) os.remove(output_clwstrict)
def test_fasta_db_prot_legacy(self): """Test makeblastdb wrapper with protein database legacy, version 4.""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], blastdb_version=4, input_file="GenBank/NC_005816.faa", dbtype="prot", hash_index=True, max_file_sz="20MB", parse_seqids=True, taxid=10, ) self.assertEqual( str(cline), _escape_filename(exe_names["makeblastdb"]) + " -blastdb_version 4" " -dbtype prot -in GenBank/NC_005816.faa" " -parse_seqids -hash_index -max_file_sz 20MB" " -taxid 10", ) child = subprocess.Popen( str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phr")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pin")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pog")) self.assertTrue( os.path.isfile("GenBank/NC_005816.faa.psd") or os.path.isfile("GenBank/NC_005816.faa.pnd") ) self.assertTrue( os.path.isfile("GenBank/NC_005816.faa.psi") or os.path.isfile("GenBank/NC_005816.faa.pni") ) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.psq"))
def test_Muscle_with_options(self): """Round-trip through app with a switch and valued option""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("input", self.infile1) # "input" is alias for "in" cmdline.set_parameter("out", self.outfile2) #Use property: cmdline.objscore = "sp" cmdline.noanchors = True self.assertEqual(str(cmdline), _escape_filename(muscle_exe) + " -in Fasta/f002" + " -out Fasta/temp_align_out2.fa" + " -objscore sp -noanchors") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) output, error = cmdline() self.assertEqual(output, "") self.assertTrue("ERROR" not in error) self.assertTrue(error.strip().startswith("MUSCLE"), output)
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) # i.e. True #Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01" + " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + " -once -skipins -realbranches") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout, stdout)
def test_Prank_complex_command_line(self): """Round-trip with complex command line.""" cmdline = PrankCommandline(prank_exe) cmdline.set_parameter("d", self.infile1) cmdline.set_parameter("-gaprate", 0.321) cmdline.set_parameter("gapext", 0.6) cmdline.set_parameter("-dots", 1) # i.e. True # Try using a property: cmdline.kappa = 3 cmdline.skipins = True cmdline.set_parameter("-once", True) cmdline.realbranches = True self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01" + " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" + " -once -skipins -realbranches") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue("Total time" in stdout, stdout)
def test_long(self): """Simple muscle call using long file""" #Create a large input file by converting some of another example file temp_large_fasta_file = "temp_cw_prot.fasta" records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40] SeqIO.write(records, temp_large_fasta_file, "fasta") #Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", temp_large_fasta_file) #Use fast options cmdline.set_parameter("maxiters", 1) cmdline.set_parameter("diags", True) # Default None treated as False! #Use clustal output cmdline.set_parameter("clwstrict", True) # Default None treated as False! #Shoudn't need this, but just to make sure it is accepted cmdline.set_parameter("maxhours", 0.1) #No progress reports to stderr cmdline.set_parameter("quiet", True) # Default None treated as False! self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in temp_cw_prot.fasta -diags -maxhours 0.1" + " -maxiters 1 -clwstrict -quiet") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) align = AlignIO.read(child.stdout, "clustal") align.sort() records.sort(key = lambda rec: rec.id) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) #See if quiet worked: self.assertEqual("", child.stderr.read().strip()) return_code = child.wait() self.assertEqual(return_code, 0) child.stdout.close() child.stderr.close() del child os.remove(temp_large_fasta_file)
def test_tblastn(self): """Pairwise TBLASTN search""" global exe_names cline = Applications.NcbitblastnCommandline(exe_names["tblastn"], query="GenBank/NC_005816.faa", subject="GenBank/NC_005816.fna", evalue="1e-6") self.assertEqual(str(cline), _escape_filename(exe_names["tblastn"]) + " -query GenBank/NC_005816.faa -evalue 1e-6" + " -subject GenBank/NC_005816.fna") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual(return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) self.assertEqual(10, stdoutdata.count("Query= ")) self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
def __str__(self): """Write out the command line as a string.""" #On Linux with clustalw 1.83, you can do: #clustalw input.faa #clustalw /full/path/input.faa #clustalw -INFILE=input.faa #clustalw -INFILE=/full/path/input.faa # #Note these fail (using DOS style slashes): # #clustalw /INFILE=input.faa #clustalw /INFILE=/full/path/input.faa # #On Windows XP with clustalw.exe 1.83, these work at #the command prompt: # #clustalw.exe input.faa #clustalw.exe /INFILE=input.faa #clustalw.exe /INFILE="input.faa" #clustalw.exe /INFILE="with space.faa" #clustalw.exe /INFILE=C:\full\path\input.faa #clustalw.exe /INFILE="C:\full path\with spaces.faa" # #Sadly these fail: #clustalw.exe "input.faa" #clustalw.exe "with space.faa" #clustalw.exe C:\full\path\input.faa #clustalw.exe "C:\full path\with spaces.faa" # #Testing today (using a different binary of clustalw.exe 1.83), #using -INFILE as follows seems to work. However I had once noted: #These also fail but a minus/dash does seem to #work with other options (!): #clustalw.exe -INFILE=input.faa #clustalw.exe -INFILE=C:\full\path\input.faa # #Also these fail: #clustalw.exe "/INFILE=input.faa" #clustalw.exe "/INFILE=C:\full\path\input.faa" # #Thanks to Emanuel Hey for flagging this on the mailing list. # #In addition, both self.command and self.sequence_file #may contain spaces, so should be quoted. But clustalw #is fussy. cline = _escape_filename(self.command) cline += ' -INFILE=%s' % _escape_filename(self.sequence_file) # general options if self.type: cline += " -TYPE=%s" % self.type if self.is_quick == 1: #Some versions of clustalw are case sensitive, #and require -quicktree rather than -QUICKTREE cline += " -quicktree" if self.allow_negative == 1: cline += " -NEGATIVE" # output options if self.output_file: cline += " -OUTFILE=%s" % _escape_filename(self.output_file) if self.output_type: cline += " -OUTPUT=%s" % self.output_type if self.output_order: cline += " -OUTORDER=%s" % self.output_order if self.change_case: cline += " -CASE=%s" % self.change_case if self.add_seqnos: cline += " -SEQNOS=%s" % self.add_seqnos if self.new_tree: # clustal does not work if -align is written -ALIGN cline += " -NEWTREE=%s -align" % _escape_filename(self.new_tree) # multiple alignment options if self.guide_tree: cline += " -USETREE=%s" % _escape_filename(self.guide_tree) if self.protein_matrix: cline += " -MATRIX=%s" % self.protein_matrix if self.dna_matrix: cline += " -DNAMATRIX=%s" % self.dna_matrix if self.gap_open_pen: cline += " -GAPOPEN=%s" % self.gap_open_pen if self.gap_ext_pen: cline += " -GAPEXT=%s" % self.gap_ext_pen if self.is_no_end_pen == 1: cline += " -ENDGAPS" if self.gap_sep_range: cline += " -GAPDIST=%s" % self.gap_sep_range if self.is_no_pgap == 1: cline += " -NOPGAP" if self.is_no_hgap == 1: cline += " -NOHGAP" if len(self.h_gap_residues) != 0: # stick the list of residues together as one big list o' residues residue_list = '' for residue in self.h_gap_residues: residue_list = residue_list + residue cline += " -HGAPRESIDUES=%s" % residue_list if self.max_div: cline += " -MAXDIV=%s" % self.max_div if self.trans_weight: cline += " -TRANSWEIGHT=%s" % self.trans_weight return cline